9563 lines
265 KiB
JSON
9563 lines
265 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 4326,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.008090614886731391,
|
|
"grad_norm": 11.186269453387633,
|
|
"learning_rate": 3.695150115473441e-07,
|
|
"loss": 0.8139,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.8276135325431824,
|
|
"step": 5,
|
|
"valid_targets_mean": 7367.1,
|
|
"valid_targets_min": 1791
|
|
},
|
|
{
|
|
"epoch": 0.016181229773462782,
|
|
"grad_norm": 11.232529644323693,
|
|
"learning_rate": 8.314087759815243e-07,
|
|
"loss": 0.7781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7600369453430176,
|
|
"step": 10,
|
|
"valid_targets_mean": 8089.4,
|
|
"valid_targets_min": 5145
|
|
},
|
|
{
|
|
"epoch": 0.024271844660194174,
|
|
"grad_norm": 10.15603607573941,
|
|
"learning_rate": 1.2933025404157046e-06,
|
|
"loss": 0.7791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7623764872550964,
|
|
"step": 15,
|
|
"valid_targets_mean": 7736.6,
|
|
"valid_targets_min": 2978
|
|
},
|
|
{
|
|
"epoch": 0.032362459546925564,
|
|
"grad_norm": 8.428519297500355,
|
|
"learning_rate": 1.7551963048498846e-06,
|
|
"loss": 0.756,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7740650177001953,
|
|
"step": 20,
|
|
"valid_targets_mean": 7745.0,
|
|
"valid_targets_min": 2988
|
|
},
|
|
{
|
|
"epoch": 0.040453074433656956,
|
|
"grad_norm": 5.849750741120756,
|
|
"learning_rate": 2.217090069284065e-06,
|
|
"loss": 0.6945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7195521593093872,
|
|
"step": 25,
|
|
"valid_targets_mean": 7508.9,
|
|
"valid_targets_min": 3292
|
|
},
|
|
{
|
|
"epoch": 0.04854368932038835,
|
|
"grad_norm": 3.999112186202206,
|
|
"learning_rate": 2.678983833718245e-06,
|
|
"loss": 0.6908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7112835645675659,
|
|
"step": 30,
|
|
"valid_targets_mean": 7365.6,
|
|
"valid_targets_min": 3274
|
|
},
|
|
{
|
|
"epoch": 0.05663430420711974,
|
|
"grad_norm": 2.3642895753979727,
|
|
"learning_rate": 3.1408775981524254e-06,
|
|
"loss": 0.6478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.7030115127563477,
|
|
"step": 35,
|
|
"valid_targets_mean": 7283.9,
|
|
"valid_targets_min": 2285
|
|
},
|
|
{
|
|
"epoch": 0.06472491909385113,
|
|
"grad_norm": 1.5453038086692914,
|
|
"learning_rate": 3.6027713625866056e-06,
|
|
"loss": 0.6036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.614423394203186,
|
|
"step": 40,
|
|
"valid_targets_mean": 7289.2,
|
|
"valid_targets_min": 2822
|
|
},
|
|
{
|
|
"epoch": 0.07281553398058252,
|
|
"grad_norm": 1.2616787513943852,
|
|
"learning_rate": 4.064665127020786e-06,
|
|
"loss": 0.559,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5736663341522217,
|
|
"step": 45,
|
|
"valid_targets_mean": 7387.5,
|
|
"valid_targets_min": 3823
|
|
},
|
|
{
|
|
"epoch": 0.08090614886731391,
|
|
"grad_norm": 1.0884084428534417,
|
|
"learning_rate": 4.526558891454966e-06,
|
|
"loss": 0.5749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.564963161945343,
|
|
"step": 50,
|
|
"valid_targets_mean": 6462.8,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 0.0889967637540453,
|
|
"grad_norm": 0.8836061625007553,
|
|
"learning_rate": 4.988452655889146e-06,
|
|
"loss": 0.577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5905070900917053,
|
|
"step": 55,
|
|
"valid_targets_mean": 9115.1,
|
|
"valid_targets_min": 5927
|
|
},
|
|
{
|
|
"epoch": 0.0970873786407767,
|
|
"grad_norm": 0.8333417163648019,
|
|
"learning_rate": 5.450346420323326e-06,
|
|
"loss": 0.5768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5808031558990479,
|
|
"step": 60,
|
|
"valid_targets_mean": 7797.2,
|
|
"valid_targets_min": 1985
|
|
},
|
|
{
|
|
"epoch": 0.10517799352750809,
|
|
"grad_norm": 0.7005772125420773,
|
|
"learning_rate": 5.912240184757506e-06,
|
|
"loss": 0.5193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4872642159461975,
|
|
"step": 65,
|
|
"valid_targets_mean": 7330.4,
|
|
"valid_targets_min": 2779
|
|
},
|
|
{
|
|
"epoch": 0.11326860841423948,
|
|
"grad_norm": 0.7219020916672013,
|
|
"learning_rate": 6.374133949191687e-06,
|
|
"loss": 0.499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48490405082702637,
|
|
"step": 70,
|
|
"valid_targets_mean": 7086.4,
|
|
"valid_targets_min": 2870
|
|
},
|
|
{
|
|
"epoch": 0.12135922330097088,
|
|
"grad_norm": 0.5860056349140036,
|
|
"learning_rate": 6.836027713625867e-06,
|
|
"loss": 0.534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.538621187210083,
|
|
"step": 75,
|
|
"valid_targets_mean": 8391.3,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 0.12944983818770225,
|
|
"grad_norm": 0.6746262711354033,
|
|
"learning_rate": 7.297921478060047e-06,
|
|
"loss": 0.5221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48001986742019653,
|
|
"step": 80,
|
|
"valid_targets_mean": 6497.2,
|
|
"valid_targets_min": 3127
|
|
},
|
|
{
|
|
"epoch": 0.13754045307443366,
|
|
"grad_norm": 0.5575974584155833,
|
|
"learning_rate": 7.759815242494227e-06,
|
|
"loss": 0.4746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4626394212245941,
|
|
"step": 85,
|
|
"valid_targets_mean": 6541.8,
|
|
"valid_targets_min": 3567
|
|
},
|
|
{
|
|
"epoch": 0.14563106796116504,
|
|
"grad_norm": 0.5396164383780504,
|
|
"learning_rate": 8.221709006928407e-06,
|
|
"loss": 0.449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46605369448661804,
|
|
"step": 90,
|
|
"valid_targets_mean": 7617.8,
|
|
"valid_targets_min": 3885
|
|
},
|
|
{
|
|
"epoch": 0.15372168284789645,
|
|
"grad_norm": 0.6191649901679567,
|
|
"learning_rate": 8.683602771362586e-06,
|
|
"loss": 0.4983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5357075929641724,
|
|
"step": 95,
|
|
"valid_targets_mean": 5710.0,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 0.16181229773462782,
|
|
"grad_norm": 0.47894871102676023,
|
|
"learning_rate": 9.145496535796767e-06,
|
|
"loss": 0.4542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35243353247642517,
|
|
"step": 100,
|
|
"valid_targets_mean": 6085.1,
|
|
"valid_targets_min": 3073
|
|
},
|
|
{
|
|
"epoch": 0.16990291262135923,
|
|
"grad_norm": 0.4740843869327914,
|
|
"learning_rate": 9.607390300230948e-06,
|
|
"loss": 0.4785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4505811929702759,
|
|
"step": 105,
|
|
"valid_targets_mean": 7739.5,
|
|
"valid_targets_min": 3524
|
|
},
|
|
{
|
|
"epoch": 0.1779935275080906,
|
|
"grad_norm": 0.46112304946532046,
|
|
"learning_rate": 1.0069284064665128e-05,
|
|
"loss": 0.458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4704515337944031,
|
|
"step": 110,
|
|
"valid_targets_mean": 7832.8,
|
|
"valid_targets_min": 2404
|
|
},
|
|
{
|
|
"epoch": 0.18608414239482202,
|
|
"grad_norm": 0.5109584178686144,
|
|
"learning_rate": 1.0531177829099309e-05,
|
|
"loss": 0.4322,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44879865646362305,
|
|
"step": 115,
|
|
"valid_targets_mean": 6810.3,
|
|
"valid_targets_min": 3979
|
|
},
|
|
{
|
|
"epoch": 0.1941747572815534,
|
|
"grad_norm": 0.46619850801703333,
|
|
"learning_rate": 1.0993071593533488e-05,
|
|
"loss": 0.4315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4397251009941101,
|
|
"step": 120,
|
|
"valid_targets_mean": 7624.2,
|
|
"valid_targets_min": 3905
|
|
},
|
|
{
|
|
"epoch": 0.2022653721682848,
|
|
"grad_norm": 0.47702679305743545,
|
|
"learning_rate": 1.1454965357967669e-05,
|
|
"loss": 0.4447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43253201246261597,
|
|
"step": 125,
|
|
"valid_targets_mean": 6795.1,
|
|
"valid_targets_min": 3148
|
|
},
|
|
{
|
|
"epoch": 0.21035598705501618,
|
|
"grad_norm": 1.2109949158028912,
|
|
"learning_rate": 1.1916859122401848e-05,
|
|
"loss": 0.6233,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6370628476142883,
|
|
"step": 130,
|
|
"valid_targets_mean": 3099.5,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 0.21844660194174756,
|
|
"grad_norm": 0.9744370820075807,
|
|
"learning_rate": 1.237875288683603e-05,
|
|
"loss": 0.6511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6300196051597595,
|
|
"step": 135,
|
|
"valid_targets_mean": 3709.8,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 0.22653721682847897,
|
|
"grad_norm": 0.902472131901638,
|
|
"learning_rate": 1.284064665127021e-05,
|
|
"loss": 0.6134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5684735774993896,
|
|
"step": 140,
|
|
"valid_targets_mean": 2654.6,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 0.23462783171521034,
|
|
"grad_norm": 0.9072911363668095,
|
|
"learning_rate": 1.3302540415704388e-05,
|
|
"loss": 0.607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6057754755020142,
|
|
"step": 145,
|
|
"valid_targets_mean": 2991.8,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 0.24271844660194175,
|
|
"grad_norm": 0.8517220825207069,
|
|
"learning_rate": 1.3764434180138568e-05,
|
|
"loss": 0.6192,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6334041953086853,
|
|
"step": 150,
|
|
"valid_targets_mean": 3261.2,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 0.25080906148867316,
|
|
"grad_norm": 0.837155020681443,
|
|
"learning_rate": 1.4226327944572749e-05,
|
|
"loss": 0.599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5919010639190674,
|
|
"step": 155,
|
|
"valid_targets_mean": 3676.6,
|
|
"valid_targets_min": 1603
|
|
},
|
|
{
|
|
"epoch": 0.2588996763754045,
|
|
"grad_norm": 0.8162992185568456,
|
|
"learning_rate": 1.468822170900693e-05,
|
|
"loss": 0.597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5819936990737915,
|
|
"step": 160,
|
|
"valid_targets_mean": 3011.1,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 0.2669902912621359,
|
|
"grad_norm": 0.7935129254194224,
|
|
"learning_rate": 1.5150115473441109e-05,
|
|
"loss": 0.567,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5208292603492737,
|
|
"step": 165,
|
|
"valid_targets_mean": 3071.8,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 0.2750809061488673,
|
|
"grad_norm": 0.8146619781186482,
|
|
"learning_rate": 1.561200923787529e-05,
|
|
"loss": 0.5688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.583451509475708,
|
|
"step": 170,
|
|
"valid_targets_mean": 3090.8,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 0.28317152103559873,
|
|
"grad_norm": 0.7721275818850296,
|
|
"learning_rate": 1.607390300230947e-05,
|
|
"loss": 0.5882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5752245187759399,
|
|
"step": 175,
|
|
"valid_targets_mean": 3369.2,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 0.2912621359223301,
|
|
"grad_norm": 0.7807944253862608,
|
|
"learning_rate": 1.653579676674365e-05,
|
|
"loss": 0.5595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.602877676486969,
|
|
"step": 180,
|
|
"valid_targets_mean": 3657.6,
|
|
"valid_targets_min": 1556
|
|
},
|
|
{
|
|
"epoch": 0.2993527508090615,
|
|
"grad_norm": 1.237953587348846,
|
|
"learning_rate": 1.699769053117783e-05,
|
|
"loss": 0.5697,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5734540224075317,
|
|
"step": 185,
|
|
"valid_targets_mean": 2946.7,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 0.3074433656957929,
|
|
"grad_norm": 0.7416704166205575,
|
|
"learning_rate": 1.7459584295612013e-05,
|
|
"loss": 0.5392,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5371339321136475,
|
|
"step": 190,
|
|
"valid_targets_mean": 3523.2,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 0.3155339805825243,
|
|
"grad_norm": 0.5535465229823986,
|
|
"learning_rate": 1.792147806004619e-05,
|
|
"loss": 0.5082,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46257442235946655,
|
|
"step": 195,
|
|
"valid_targets_mean": 5521.4,
|
|
"valid_targets_min": 1360
|
|
},
|
|
{
|
|
"epoch": 0.32362459546925565,
|
|
"grad_norm": 0.7440676720749868,
|
|
"learning_rate": 1.838337182448037e-05,
|
|
"loss": 0.5344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5397250652313232,
|
|
"step": 200,
|
|
"valid_targets_mean": 3396.9,
|
|
"valid_targets_min": 1336
|
|
},
|
|
{
|
|
"epoch": 0.33171521035598706,
|
|
"grad_norm": 0.7524676198416014,
|
|
"learning_rate": 1.8845265588914552e-05,
|
|
"loss": 0.5443,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.541986346244812,
|
|
"step": 205,
|
|
"valid_targets_mean": 3617.4,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 0.33980582524271846,
|
|
"grad_norm": 0.6640846601991444,
|
|
"learning_rate": 1.9307159353348733e-05,
|
|
"loss": 0.5342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5500484704971313,
|
|
"step": 210,
|
|
"valid_targets_mean": 4753.2,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 0.3478964401294498,
|
|
"grad_norm": 0.7894552351445728,
|
|
"learning_rate": 1.976905311778291e-05,
|
|
"loss": 0.5287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5164601802825928,
|
|
"step": 215,
|
|
"valid_targets_mean": 2988.0,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 0.3559870550161812,
|
|
"grad_norm": 0.7612981287051088,
|
|
"learning_rate": 2.0230946882217092e-05,
|
|
"loss": 0.5401,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.6017681360244751,
|
|
"step": 220,
|
|
"valid_targets_mean": 3400.9,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 0.3640776699029126,
|
|
"grad_norm": 0.6939836605089136,
|
|
"learning_rate": 2.0692840646651273e-05,
|
|
"loss": 0.5343,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5006556510925293,
|
|
"step": 225,
|
|
"valid_targets_mean": 3745.3,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 0.37216828478964403,
|
|
"grad_norm": 0.6889492438195666,
|
|
"learning_rate": 2.115473441108545e-05,
|
|
"loss": 0.528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48514801263809204,
|
|
"step": 230,
|
|
"valid_targets_mean": 3360.9,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 0.3802588996763754,
|
|
"grad_norm": 0.7623990047403026,
|
|
"learning_rate": 2.1616628175519635e-05,
|
|
"loss": 0.5487,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.57421875,
|
|
"step": 235,
|
|
"valid_targets_mean": 3318.2,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 0.3883495145631068,
|
|
"grad_norm": 0.8608207250476633,
|
|
"learning_rate": 2.2078521939953813e-05,
|
|
"loss": 0.5549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5341051816940308,
|
|
"step": 240,
|
|
"valid_targets_mean": 3564.1,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 0.3964401294498382,
|
|
"grad_norm": 0.8949184784742432,
|
|
"learning_rate": 2.2540415704387994e-05,
|
|
"loss": 0.5426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5230085849761963,
|
|
"step": 245,
|
|
"valid_targets_mean": 2470.4,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 0.4045307443365696,
|
|
"grad_norm": 0.6957663651549183,
|
|
"learning_rate": 2.3002309468822172e-05,
|
|
"loss": 0.5081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4455685019493103,
|
|
"step": 250,
|
|
"valid_targets_mean": 3419.8,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 0.41262135922330095,
|
|
"grad_norm": 0.6856073914304861,
|
|
"learning_rate": 2.346420323325635e-05,
|
|
"loss": 0.5198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5033816695213318,
|
|
"step": 255,
|
|
"valid_targets_mean": 3587.9,
|
|
"valid_targets_min": 1375
|
|
},
|
|
{
|
|
"epoch": 0.42071197411003236,
|
|
"grad_norm": 0.8179949015928839,
|
|
"learning_rate": 2.3926096997690534e-05,
|
|
"loss": 0.5441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5254044532775879,
|
|
"step": 260,
|
|
"valid_targets_mean": 2614.4,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 0.42880258899676377,
|
|
"grad_norm": 0.7498900598248376,
|
|
"learning_rate": 2.438799076212471e-05,
|
|
"loss": 0.5174,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5212750434875488,
|
|
"step": 265,
|
|
"valid_targets_mean": 3381.9,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 0.4368932038834951,
|
|
"grad_norm": 0.7504675090521222,
|
|
"learning_rate": 2.4849884526558893e-05,
|
|
"loss": 0.5208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46384745836257935,
|
|
"step": 270,
|
|
"valid_targets_mean": 3233.6,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 0.4449838187702265,
|
|
"grad_norm": 0.7916017536563397,
|
|
"learning_rate": 2.5311778290993074e-05,
|
|
"loss": 0.5024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4946463704109192,
|
|
"step": 275,
|
|
"valid_targets_mean": 3305.2,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 0.45307443365695793,
|
|
"grad_norm": 0.6782862035278908,
|
|
"learning_rate": 2.5773672055427255e-05,
|
|
"loss": 0.4904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4988747835159302,
|
|
"step": 280,
|
|
"valid_targets_mean": 3871.6,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 0.46116504854368934,
|
|
"grad_norm": 0.7600807982882761,
|
|
"learning_rate": 2.6235565819861432e-05,
|
|
"loss": 0.4994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4982724189758301,
|
|
"step": 285,
|
|
"valid_targets_mean": 3910.4,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 0.4692556634304207,
|
|
"grad_norm": 0.5993399825918464,
|
|
"learning_rate": 2.6697459584295617e-05,
|
|
"loss": 0.4788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4518687129020691,
|
|
"step": 290,
|
|
"valid_targets_mean": 4702.8,
|
|
"valid_targets_min": 2246
|
|
},
|
|
{
|
|
"epoch": 0.4773462783171521,
|
|
"grad_norm": 0.7474788095150359,
|
|
"learning_rate": 2.7159353348729794e-05,
|
|
"loss": 0.51,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5318153500556946,
|
|
"step": 295,
|
|
"valid_targets_mean": 3323.4,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 0.4854368932038835,
|
|
"grad_norm": 0.6444802845925507,
|
|
"learning_rate": 2.7621247113163975e-05,
|
|
"loss": 0.4758,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4648396372795105,
|
|
"step": 300,
|
|
"valid_targets_mean": 3920.9,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 0.4935275080906149,
|
|
"grad_norm": 0.6015753165422811,
|
|
"learning_rate": 2.8083140877598153e-05,
|
|
"loss": 0.4764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5057991743087769,
|
|
"step": 305,
|
|
"valid_targets_mean": 4233.6,
|
|
"valid_targets_min": 1361
|
|
},
|
|
{
|
|
"epoch": 0.5016181229773463,
|
|
"grad_norm": 0.8937976596128937,
|
|
"learning_rate": 2.8545034642032338e-05,
|
|
"loss": 0.5102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5273138284683228,
|
|
"step": 310,
|
|
"valid_targets_mean": 3595.1,
|
|
"valid_targets_min": 1368
|
|
},
|
|
{
|
|
"epoch": 0.5097087378640777,
|
|
"grad_norm": 0.7060203395582099,
|
|
"learning_rate": 2.9006928406466515e-05,
|
|
"loss": 0.5119,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5150483250617981,
|
|
"step": 315,
|
|
"valid_targets_mean": 3354.8,
|
|
"valid_targets_min": 1186
|
|
},
|
|
{
|
|
"epoch": 0.517799352750809,
|
|
"grad_norm": 0.6260717983381987,
|
|
"learning_rate": 2.9468822170900696e-05,
|
|
"loss": 0.454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45638102293014526,
|
|
"step": 320,
|
|
"valid_targets_mean": 4103.5,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 0.5258899676375405,
|
|
"grad_norm": 0.6771131697225247,
|
|
"learning_rate": 2.9930715935334874e-05,
|
|
"loss": 0.4654,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.452455073595047,
|
|
"step": 325,
|
|
"valid_targets_mean": 4204.2,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 0.5339805825242718,
|
|
"grad_norm": 0.6764922910260465,
|
|
"learning_rate": 3.0392609699769055e-05,
|
|
"loss": 0.4769,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4742300808429718,
|
|
"step": 330,
|
|
"valid_targets_mean": 4473.8,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 0.5420711974110033,
|
|
"grad_norm": 0.7962549126746353,
|
|
"learning_rate": 3.0854503464203236e-05,
|
|
"loss": 0.4637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47426992654800415,
|
|
"step": 335,
|
|
"valid_targets_mean": 2837.8,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 0.5501618122977346,
|
|
"grad_norm": 0.5727443646037912,
|
|
"learning_rate": 3.131639722863742e-05,
|
|
"loss": 0.4537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4242827892303467,
|
|
"step": 340,
|
|
"valid_targets_mean": 4635.1,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 0.558252427184466,
|
|
"grad_norm": 0.7378350621646511,
|
|
"learning_rate": 3.17782909930716e-05,
|
|
"loss": 0.4565,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5066303014755249,
|
|
"step": 345,
|
|
"valid_targets_mean": 3667.0,
|
|
"valid_targets_min": 1395
|
|
},
|
|
{
|
|
"epoch": 0.5663430420711975,
|
|
"grad_norm": 0.6783611200487234,
|
|
"learning_rate": 3.224018475750577e-05,
|
|
"loss": 0.4616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4323498606681824,
|
|
"step": 350,
|
|
"valid_targets_mean": 4262.1,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 0.5744336569579288,
|
|
"grad_norm": 0.757137324177695,
|
|
"learning_rate": 3.270207852193996e-05,
|
|
"loss": 0.4876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48740166425704956,
|
|
"step": 355,
|
|
"valid_targets_mean": 3191.4,
|
|
"valid_targets_min": 1688
|
|
},
|
|
{
|
|
"epoch": 0.5825242718446602,
|
|
"grad_norm": 0.6858210837377695,
|
|
"learning_rate": 3.3163972286374135e-05,
|
|
"loss": 0.4397,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43850621581077576,
|
|
"step": 360,
|
|
"valid_targets_mean": 3711.2,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 0.5906148867313916,
|
|
"grad_norm": 0.6287374086435015,
|
|
"learning_rate": 3.3625866050808316e-05,
|
|
"loss": 0.4219,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40141230821609497,
|
|
"step": 365,
|
|
"valid_targets_mean": 4141.0,
|
|
"valid_targets_min": 1576
|
|
},
|
|
{
|
|
"epoch": 0.598705501618123,
|
|
"grad_norm": 0.6975941132702707,
|
|
"learning_rate": 3.40877598152425e-05,
|
|
"loss": 0.4847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5583797097206116,
|
|
"step": 370,
|
|
"valid_targets_mean": 3920.6,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 0.6067961165048543,
|
|
"grad_norm": 0.7279439914063431,
|
|
"learning_rate": 3.454965357967668e-05,
|
|
"loss": 0.4577,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4639485478401184,
|
|
"step": 375,
|
|
"valid_targets_mean": 2930.4,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 0.6148867313915858,
|
|
"grad_norm": 0.6768702572313738,
|
|
"learning_rate": 3.501154734411086e-05,
|
|
"loss": 0.4622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.490790992975235,
|
|
"step": 380,
|
|
"valid_targets_mean": 3889.4,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 0.6229773462783171,
|
|
"grad_norm": 0.6487053280504474,
|
|
"learning_rate": 3.547344110854504e-05,
|
|
"loss": 0.4638,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45948219299316406,
|
|
"step": 385,
|
|
"valid_targets_mean": 4169.0,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 0.6310679611650486,
|
|
"grad_norm": 0.882983035029029,
|
|
"learning_rate": 3.5935334872979214e-05,
|
|
"loss": 0.4749,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5022059082984924,
|
|
"step": 390,
|
|
"valid_targets_mean": 2340.9,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 0.63915857605178,
|
|
"grad_norm": 0.6320317979894593,
|
|
"learning_rate": 3.63972286374134e-05,
|
|
"loss": 0.4486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40444374084472656,
|
|
"step": 395,
|
|
"valid_targets_mean": 3383.8,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 0.6472491909385113,
|
|
"grad_norm": 0.5818960322996949,
|
|
"learning_rate": 3.6859122401847576e-05,
|
|
"loss": 0.4295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4237382411956787,
|
|
"step": 400,
|
|
"valid_targets_mean": 4683.7,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 0.6553398058252428,
|
|
"grad_norm": 0.809577266579874,
|
|
"learning_rate": 3.732101616628176e-05,
|
|
"loss": 0.4313,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45975261926651,
|
|
"step": 405,
|
|
"valid_targets_mean": 3994.8,
|
|
"valid_targets_min": 1930
|
|
},
|
|
{
|
|
"epoch": 0.6634304207119741,
|
|
"grad_norm": 0.659659549528883,
|
|
"learning_rate": 3.778290993071594e-05,
|
|
"loss": 0.4204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43026500940322876,
|
|
"step": 410,
|
|
"valid_targets_mean": 3837.7,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 0.6715210355987055,
|
|
"grad_norm": 0.7331448698616245,
|
|
"learning_rate": 3.824480369515012e-05,
|
|
"loss": 0.4221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44799500703811646,
|
|
"step": 415,
|
|
"valid_targets_mean": 3206.0,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 0.6796116504854369,
|
|
"grad_norm": 0.6992820860572108,
|
|
"learning_rate": 3.87066974595843e-05,
|
|
"loss": 0.4319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41871821880340576,
|
|
"step": 420,
|
|
"valid_targets_mean": 3158.6,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 0.6877022653721683,
|
|
"grad_norm": 0.5903634393524475,
|
|
"learning_rate": 3.9168591224018475e-05,
|
|
"loss": 0.4447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4192928969860077,
|
|
"step": 425,
|
|
"valid_targets_mean": 4985.4,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 0.6957928802588996,
|
|
"grad_norm": 0.6935560552905831,
|
|
"learning_rate": 3.963048498845266e-05,
|
|
"loss": 0.4453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4519827365875244,
|
|
"step": 430,
|
|
"valid_targets_mean": 3605.8,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 0.7038834951456311,
|
|
"grad_norm": 0.6756947997969236,
|
|
"learning_rate": 3.999999348775225e-05,
|
|
"loss": 0.4414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47775375843048096,
|
|
"step": 435,
|
|
"valid_targets_mean": 3628.4,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 0.7119741100323624,
|
|
"grad_norm": 0.7329858551067672,
|
|
"learning_rate": 3.9999765559526296e-05,
|
|
"loss": 0.4066,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40486299991607666,
|
|
"step": 440,
|
|
"valid_targets_mean": 4005.9,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 0.7200647249190939,
|
|
"grad_norm": 0.6595983242797786,
|
|
"learning_rate": 3.999921202315374e-05,
|
|
"loss": 0.4161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3833836317062378,
|
|
"step": 445,
|
|
"valid_targets_mean": 3881.7,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 0.7281553398058253,
|
|
"grad_norm": 0.6727930683168484,
|
|
"learning_rate": 3.9998332887646504e-05,
|
|
"loss": 0.4495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4389573633670807,
|
|
"step": 450,
|
|
"valid_targets_mean": 3740.8,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 0.7362459546925566,
|
|
"grad_norm": 0.9466550805793468,
|
|
"learning_rate": 3.999712816731743e-05,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3840217590332031,
|
|
"step": 455,
|
|
"valid_targets_mean": 3616.2,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 0.7443365695792881,
|
|
"grad_norm": 0.6217684732379861,
|
|
"learning_rate": 3.999559788178009e-05,
|
|
"loss": 0.4305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41253650188446045,
|
|
"step": 460,
|
|
"valid_targets_mean": 3856.8,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 0.7524271844660194,
|
|
"grad_norm": 0.6248857718707198,
|
|
"learning_rate": 3.999374205594845e-05,
|
|
"loss": 0.3917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30960381031036377,
|
|
"step": 465,
|
|
"valid_targets_mean": 6443.2,
|
|
"valid_targets_min": 2485
|
|
},
|
|
{
|
|
"epoch": 0.7605177993527508,
|
|
"grad_norm": 0.5872970629177866,
|
|
"learning_rate": 3.999156072003646e-05,
|
|
"loss": 0.3286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28291448950767517,
|
|
"step": 470,
|
|
"valid_targets_mean": 5332.4,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 0.7686084142394822,
|
|
"grad_norm": 0.47835828651939827,
|
|
"learning_rate": 3.9989053909557576e-05,
|
|
"loss": 0.301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31238192319869995,
|
|
"step": 475,
|
|
"valid_targets_mean": 6187.7,
|
|
"valid_targets_min": 2207
|
|
},
|
|
{
|
|
"epoch": 0.7766990291262136,
|
|
"grad_norm": 0.4871074224017115,
|
|
"learning_rate": 3.998622166532417e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2571246922016144,
|
|
"step": 480,
|
|
"valid_targets_mean": 5631.6,
|
|
"valid_targets_min": 2795
|
|
},
|
|
{
|
|
"epoch": 0.7847896440129449,
|
|
"grad_norm": 0.4263025040455046,
|
|
"learning_rate": 3.998306403344688e-05,
|
|
"loss": 0.3239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31412506103515625,
|
|
"step": 485,
|
|
"valid_targets_mean": 7407.4,
|
|
"valid_targets_min": 1902
|
|
},
|
|
{
|
|
"epoch": 0.7928802588996764,
|
|
"grad_norm": 0.47947323925930374,
|
|
"learning_rate": 3.997958106533383e-05,
|
|
"loss": 0.2648,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2837316691875458,
|
|
"step": 490,
|
|
"valid_targets_mean": 5209.8,
|
|
"valid_targets_min": 2991
|
|
},
|
|
{
|
|
"epoch": 0.8009708737864077,
|
|
"grad_norm": 0.5128232630032378,
|
|
"learning_rate": 3.997577281768982e-05,
|
|
"loss": 0.2904,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35242921113967896,
|
|
"step": 495,
|
|
"valid_targets_mean": 6012.4,
|
|
"valid_targets_min": 3782
|
|
},
|
|
{
|
|
"epoch": 0.8090614886731392,
|
|
"grad_norm": 0.5145002636153433,
|
|
"learning_rate": 3.997163935251543e-05,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2537648379802704,
|
|
"step": 500,
|
|
"valid_targets_mean": 5490.8,
|
|
"valid_targets_min": 3401
|
|
},
|
|
{
|
|
"epoch": 0.8171521035598706,
|
|
"grad_norm": 0.5086616334315329,
|
|
"learning_rate": 3.996718073710591e-05,
|
|
"loss": 0.3009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30033719539642334,
|
|
"step": 505,
|
|
"valid_targets_mean": 5865.4,
|
|
"valid_targets_min": 4021
|
|
},
|
|
{
|
|
"epoch": 0.8252427184466019,
|
|
"grad_norm": 0.47029052122275783,
|
|
"learning_rate": 3.9962397044050206e-05,
|
|
"loss": 0.2726,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24913957715034485,
|
|
"step": 510,
|
|
"valid_targets_mean": 5680.2,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 0.4245474236012608,
|
|
"learning_rate": 3.99572883512297e-05,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23057851195335388,
|
|
"step": 515,
|
|
"valid_targets_mean": 5873.6,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 0.8414239482200647,
|
|
"grad_norm": 0.5632911300790845,
|
|
"learning_rate": 3.9951854741816954e-05,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33303922414779663,
|
|
"step": 520,
|
|
"valid_targets_mean": 4735.3,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 0.8495145631067961,
|
|
"grad_norm": 0.5155264290831733,
|
|
"learning_rate": 3.99460963042744e-05,
|
|
"loss": 0.2956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29260292649269104,
|
|
"step": 525,
|
|
"valid_targets_mean": 5192.4,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 0.8576051779935275,
|
|
"grad_norm": 0.46935004835095295,
|
|
"learning_rate": 3.994001313235283e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25767964124679565,
|
|
"step": 530,
|
|
"valid_targets_mean": 5253.4,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 0.8656957928802589,
|
|
"grad_norm": 0.49629700003966337,
|
|
"learning_rate": 3.993360532508993e-05,
|
|
"loss": 0.3169,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36085793375968933,
|
|
"step": 535,
|
|
"valid_targets_mean": 5694.6,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 0.8737864077669902,
|
|
"grad_norm": 0.483093900599472,
|
|
"learning_rate": 3.9926872986808626e-05,
|
|
"loss": 0.2831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2917660176753998,
|
|
"step": 540,
|
|
"valid_targets_mean": 5798.9,
|
|
"valid_targets_min": 1824
|
|
},
|
|
{
|
|
"epoch": 0.8818770226537217,
|
|
"grad_norm": 0.5566582367562767,
|
|
"learning_rate": 3.991981622711542e-05,
|
|
"loss": 0.2738,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2825314998626709,
|
|
"step": 545,
|
|
"valid_targets_mean": 6017.6,
|
|
"valid_targets_min": 2360
|
|
},
|
|
{
|
|
"epoch": 0.889967637540453,
|
|
"grad_norm": 0.46853180296707475,
|
|
"learning_rate": 3.991243516089859e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3398240804672241,
|
|
"step": 550,
|
|
"valid_targets_mean": 7051.8,
|
|
"valid_targets_min": 3489
|
|
},
|
|
{
|
|
"epoch": 0.8980582524271845,
|
|
"grad_norm": 0.4847865772351801,
|
|
"learning_rate": 3.9904729908326295e-05,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3135841190814972,
|
|
"step": 555,
|
|
"valid_targets_mean": 6098.8,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 0.9061488673139159,
|
|
"grad_norm": 0.5003251836794501,
|
|
"learning_rate": 3.989670059484465e-05,
|
|
"loss": 0.2655,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2598843574523926,
|
|
"step": 560,
|
|
"valid_targets_mean": 4782.9,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 0.9142394822006472,
|
|
"grad_norm": 0.4802100079311373,
|
|
"learning_rate": 3.98883473511757e-05,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2555955648422241,
|
|
"step": 565,
|
|
"valid_targets_mean": 5752.4,
|
|
"valid_targets_min": 3100
|
|
},
|
|
{
|
|
"epoch": 0.9223300970873787,
|
|
"grad_norm": 0.3865047150890052,
|
|
"learning_rate": 3.987967031331523e-05,
|
|
"loss": 0.2452,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2043823003768921,
|
|
"step": 570,
|
|
"valid_targets_mean": 6254.7,
|
|
"valid_targets_min": 2156
|
|
},
|
|
{
|
|
"epoch": 0.93042071197411,
|
|
"grad_norm": 0.5425912868516275,
|
|
"learning_rate": 3.987066962253063e-05,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2928149700164795,
|
|
"step": 575,
|
|
"valid_targets_mean": 5049.7,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 0.9385113268608414,
|
|
"grad_norm": 0.4214568082838799,
|
|
"learning_rate": 3.9861345425358506e-05,
|
|
"loss": 0.2795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2728239893913269,
|
|
"step": 580,
|
|
"valid_targets_mean": 6636.9,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 0.9466019417475728,
|
|
"grad_norm": 0.6495980738785143,
|
|
"learning_rate": 3.985169787360238e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2546083927154541,
|
|
"step": 585,
|
|
"valid_targets_mean": 5273.4,
|
|
"valid_targets_min": 2580
|
|
},
|
|
{
|
|
"epoch": 0.9546925566343042,
|
|
"grad_norm": 0.46365767656806356,
|
|
"learning_rate": 3.984172712433016e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27528953552246094,
|
|
"step": 590,
|
|
"valid_targets_mean": 5552.4,
|
|
"valid_targets_min": 396
|
|
},
|
|
{
|
|
"epoch": 0.9627831715210357,
|
|
"grad_norm": 0.49542433641132066,
|
|
"learning_rate": 3.983143333987161e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3295291066169739,
|
|
"step": 595,
|
|
"valid_targets_mean": 7252.6,
|
|
"valid_targets_min": 4266
|
|
},
|
|
{
|
|
"epoch": 0.970873786407767,
|
|
"grad_norm": 0.4815053281189472,
|
|
"learning_rate": 3.98208166878157e-05,
|
|
"loss": 0.2863,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2819350063800812,
|
|
"step": 600,
|
|
"valid_targets_mean": 5798.0,
|
|
"valid_targets_min": 2925
|
|
},
|
|
{
|
|
"epoch": 0.9789644012944984,
|
|
"grad_norm": 0.4362018609694083,
|
|
"learning_rate": 3.9809877341007865e-05,
|
|
"loss": 0.2741,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24610409140586853,
|
|
"step": 605,
|
|
"valid_targets_mean": 6265.6,
|
|
"valid_targets_min": 2353
|
|
},
|
|
{
|
|
"epoch": 0.9870550161812298,
|
|
"grad_norm": 0.5255099169324821,
|
|
"learning_rate": 3.979861547754723e-05,
|
|
"loss": 0.2757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2738872766494751,
|
|
"step": 610,
|
|
"valid_targets_mean": 5500.2,
|
|
"valid_targets_min": 2785
|
|
},
|
|
{
|
|
"epoch": 0.9951456310679612,
|
|
"grad_norm": 0.5814595529767955,
|
|
"learning_rate": 3.9787031280783665e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.255471408367157,
|
|
"step": 615,
|
|
"valid_targets_mean": 6825.4,
|
|
"valid_targets_min": 2113
|
|
},
|
|
{
|
|
"epoch": 1.0032362459546926,
|
|
"grad_norm": 0.521264035103106,
|
|
"learning_rate": 3.977512493931482e-05,
|
|
"loss": 0.3291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4585817754268646,
|
|
"step": 620,
|
|
"valid_targets_mean": 8109.9,
|
|
"valid_targets_min": 2721
|
|
},
|
|
{
|
|
"epoch": 1.0113268608414239,
|
|
"grad_norm": 0.4980974525271715,
|
|
"learning_rate": 3.976289664698305e-05,
|
|
"loss": 0.3732,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3889613449573517,
|
|
"step": 625,
|
|
"valid_targets_mean": 7440.1,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 1.0194174757281553,
|
|
"grad_norm": 0.44838727254780897,
|
|
"learning_rate": 3.9750346602872275e-05,
|
|
"loss": 0.3719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36031463742256165,
|
|
"step": 630,
|
|
"valid_targets_mean": 6666.0,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 1.0275080906148868,
|
|
"grad_norm": 0.511448483931203,
|
|
"learning_rate": 3.973747501130473e-05,
|
|
"loss": 0.368,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29179736971855164,
|
|
"step": 635,
|
|
"valid_targets_mean": 6712.9,
|
|
"valid_targets_min": 2430
|
|
},
|
|
{
|
|
"epoch": 1.035598705501618,
|
|
"grad_norm": 0.48412379318776255,
|
|
"learning_rate": 3.97242820818376e-05,
|
|
"loss": 0.3673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3734421133995056,
|
|
"step": 640,
|
|
"valid_targets_mean": 7593.6,
|
|
"valid_targets_min": 2385
|
|
},
|
|
{
|
|
"epoch": 1.0436893203883495,
|
|
"grad_norm": 0.5767574946446217,
|
|
"learning_rate": 3.9710768029259695e-05,
|
|
"loss": 0.3601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4075170159339905,
|
|
"step": 645,
|
|
"valid_targets_mean": 8572.8,
|
|
"valid_targets_min": 3812
|
|
},
|
|
{
|
|
"epoch": 1.051779935275081,
|
|
"grad_norm": 0.47192799914503875,
|
|
"learning_rate": 3.9696933073587864e-05,
|
|
"loss": 0.3725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3301822543144226,
|
|
"step": 650,
|
|
"valid_targets_mean": 7005.8,
|
|
"valid_targets_min": 2432
|
|
},
|
|
{
|
|
"epoch": 1.0598705501618122,
|
|
"grad_norm": 0.6861685062218944,
|
|
"learning_rate": 3.9682777440063455e-05,
|
|
"loss": 0.3764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3679102659225464,
|
|
"step": 655,
|
|
"valid_targets_mean": 7800.5,
|
|
"valid_targets_min": 2317
|
|
},
|
|
{
|
|
"epoch": 1.0679611650485437,
|
|
"grad_norm": 0.4525527931845314,
|
|
"learning_rate": 3.9668301359148655e-05,
|
|
"loss": 0.337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3416500389575958,
|
|
"step": 660,
|
|
"valid_targets_mean": 7407.8,
|
|
"valid_targets_min": 3408
|
|
},
|
|
{
|
|
"epoch": 1.0760517799352751,
|
|
"grad_norm": 0.49486536104768347,
|
|
"learning_rate": 3.965350506652272e-05,
|
|
"loss": 0.3433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34230512380599976,
|
|
"step": 665,
|
|
"valid_targets_mean": 7218.2,
|
|
"valid_targets_min": 3958
|
|
},
|
|
{
|
|
"epoch": 1.0841423948220066,
|
|
"grad_norm": 0.5052558310995379,
|
|
"learning_rate": 3.963838880307815e-05,
|
|
"loss": 0.3595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37526077032089233,
|
|
"step": 670,
|
|
"valid_targets_mean": 6286.2,
|
|
"valid_targets_min": 2470
|
|
},
|
|
{
|
|
"epoch": 1.0922330097087378,
|
|
"grad_norm": 0.4834432080334965,
|
|
"learning_rate": 3.962295281491674e-05,
|
|
"loss": 0.3608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39830780029296875,
|
|
"step": 675,
|
|
"valid_targets_mean": 8067.4,
|
|
"valid_targets_min": 3465
|
|
},
|
|
{
|
|
"epoch": 1.1003236245954693,
|
|
"grad_norm": 0.6282704766855602,
|
|
"learning_rate": 3.960719735334562e-05,
|
|
"loss": 0.3643,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32480061054229736,
|
|
"step": 680,
|
|
"valid_targets_mean": 7811.1,
|
|
"valid_targets_min": 3446
|
|
},
|
|
{
|
|
"epoch": 1.1084142394822007,
|
|
"grad_norm": 0.490852558536864,
|
|
"learning_rate": 3.959112267487314e-05,
|
|
"loss": 0.3459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3614788055419922,
|
|
"step": 685,
|
|
"valid_targets_mean": 7021.4,
|
|
"valid_targets_min": 3473
|
|
},
|
|
{
|
|
"epoch": 1.116504854368932,
|
|
"grad_norm": 0.47061983952914566,
|
|
"learning_rate": 3.957472904120467e-05,
|
|
"loss": 0.3433,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3086751103401184,
|
|
"step": 690,
|
|
"valid_targets_mean": 5997.2,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 1.1245954692556634,
|
|
"grad_norm": 0.44888571078972783,
|
|
"learning_rate": 3.955801671923837e-05,
|
|
"loss": 0.389,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3621148467063904,
|
|
"step": 695,
|
|
"valid_targets_mean": 7148.6,
|
|
"valid_targets_min": 4702
|
|
},
|
|
{
|
|
"epoch": 1.132686084142395,
|
|
"grad_norm": 0.56027022071704,
|
|
"learning_rate": 3.954098598106084e-05,
|
|
"loss": 0.3514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.325370728969574,
|
|
"step": 700,
|
|
"valid_targets_mean": 7563.4,
|
|
"valid_targets_min": 2494
|
|
},
|
|
{
|
|
"epoch": 1.1407766990291262,
|
|
"grad_norm": 0.4720884795289638,
|
|
"learning_rate": 3.952363710394269e-05,
|
|
"loss": 0.3358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32920920848846436,
|
|
"step": 705,
|
|
"valid_targets_mean": 7400.8,
|
|
"valid_targets_min": 2898
|
|
},
|
|
{
|
|
"epoch": 1.1488673139158576,
|
|
"grad_norm": 0.45290892818769596,
|
|
"learning_rate": 3.9505970370334015e-05,
|
|
"loss": 0.3353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32364755868911743,
|
|
"step": 710,
|
|
"valid_targets_mean": 6554.0,
|
|
"valid_targets_min": 2859
|
|
},
|
|
{
|
|
"epoch": 1.156957928802589,
|
|
"grad_norm": 0.4348077835242573,
|
|
"learning_rate": 3.94879860678598e-05,
|
|
"loss": 0.3591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35393470525741577,
|
|
"step": 715,
|
|
"valid_targets_mean": 8087.6,
|
|
"valid_targets_min": 3121
|
|
},
|
|
{
|
|
"epoch": 1.1650485436893203,
|
|
"grad_norm": 0.4227594099516516,
|
|
"learning_rate": 3.9469684489315256e-05,
|
|
"loss": 0.3497,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35017964243888855,
|
|
"step": 720,
|
|
"valid_targets_mean": 7483.6,
|
|
"valid_targets_min": 2526
|
|
},
|
|
{
|
|
"epoch": 1.1731391585760518,
|
|
"grad_norm": 0.4500296851620212,
|
|
"learning_rate": 3.945106593266102e-05,
|
|
"loss": 0.3508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3622860312461853,
|
|
"step": 725,
|
|
"valid_targets_mean": 7937.6,
|
|
"valid_targets_min": 2332
|
|
},
|
|
{
|
|
"epoch": 1.1812297734627832,
|
|
"grad_norm": 0.5414055066436431,
|
|
"learning_rate": 3.943213070101834e-05,
|
|
"loss": 0.3473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29072365164756775,
|
|
"step": 730,
|
|
"valid_targets_mean": 5858.5,
|
|
"valid_targets_min": 1991
|
|
},
|
|
{
|
|
"epoch": 1.1893203883495145,
|
|
"grad_norm": 0.481425269083926,
|
|
"learning_rate": 3.941287910266411e-05,
|
|
"loss": 0.3431,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29667848348617554,
|
|
"step": 735,
|
|
"valid_targets_mean": 6832.9,
|
|
"valid_targets_min": 2464
|
|
},
|
|
{
|
|
"epoch": 1.197411003236246,
|
|
"grad_norm": 0.5068660041921702,
|
|
"learning_rate": 3.9393311451025865e-05,
|
|
"loss": 0.3288,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37615975737571716,
|
|
"step": 740,
|
|
"valid_targets_mean": 7922.1,
|
|
"valid_targets_min": 3838
|
|
},
|
|
{
|
|
"epoch": 1.2055016181229774,
|
|
"grad_norm": 0.9147445359533504,
|
|
"learning_rate": 3.937342806467668e-05,
|
|
"loss": 0.3891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.48667970299720764,
|
|
"step": 745,
|
|
"valid_targets_mean": 3412.1,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 1.2135922330097086,
|
|
"grad_norm": 0.888516798970824,
|
|
"learning_rate": 3.935322926732998e-05,
|
|
"loss": 0.5244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.49741584062576294,
|
|
"step": 750,
|
|
"valid_targets_mean": 3334.9,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 1.22168284789644,
|
|
"grad_norm": 0.7818920652158852,
|
|
"learning_rate": 3.933271538783427e-05,
|
|
"loss": 0.5071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4418655037879944,
|
|
"step": 755,
|
|
"valid_targets_mean": 3261.9,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 1.2297734627831716,
|
|
"grad_norm": 0.7776487371115436,
|
|
"learning_rate": 3.931188676016777e-05,
|
|
"loss": 0.4713,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4955318570137024,
|
|
"step": 760,
|
|
"valid_targets_mean": 3196.9,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 1.237864077669903,
|
|
"grad_norm": 0.7930021342526781,
|
|
"learning_rate": 3.9290743723433e-05,
|
|
"loss": 0.5166,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5079530477523804,
|
|
"step": 765,
|
|
"valid_targets_mean": 3218.4,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 1.2459546925566343,
|
|
"grad_norm": 0.7909110791334379,
|
|
"learning_rate": 3.926928662185126e-05,
|
|
"loss": 0.491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4915957450866699,
|
|
"step": 770,
|
|
"valid_targets_mean": 3128.2,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 1.2540453074433657,
|
|
"grad_norm": 0.765823080609702,
|
|
"learning_rate": 3.924751580475698e-05,
|
|
"loss": 0.4924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.494957834482193,
|
|
"step": 775,
|
|
"valid_targets_mean": 3595.7,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 1.262135922330097,
|
|
"grad_norm": 1.045712855129931,
|
|
"learning_rate": 3.922543162659209e-05,
|
|
"loss": 0.486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.507395327091217,
|
|
"step": 780,
|
|
"valid_targets_mean": 2384.2,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 1.2702265372168284,
|
|
"grad_norm": 0.8850101405229774,
|
|
"learning_rate": 3.9203034446900224e-05,
|
|
"loss": 0.4854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.507400393486023,
|
|
"step": 785,
|
|
"valid_targets_mean": 2044.2,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 1.27831715210356,
|
|
"grad_norm": 0.7931562943412704,
|
|
"learning_rate": 3.918032463032086e-05,
|
|
"loss": 0.478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.521835446357727,
|
|
"step": 790,
|
|
"valid_targets_mean": 2718.1,
|
|
"valid_targets_min": 1454
|
|
},
|
|
{
|
|
"epoch": 1.2864077669902914,
|
|
"grad_norm": 0.723093300351076,
|
|
"learning_rate": 3.9157302546583406e-05,
|
|
"loss": 0.485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4650059938430786,
|
|
"step": 795,
|
|
"valid_targets_mean": 3258.4,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 1.2944983818770226,
|
|
"grad_norm": 0.7343947043384617,
|
|
"learning_rate": 3.913396857050115e-05,
|
|
"loss": 0.4742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4760621190071106,
|
|
"step": 800,
|
|
"valid_targets_mean": 3150.2,
|
|
"valid_targets_min": 1604
|
|
},
|
|
{
|
|
"epoch": 1.302588996763754,
|
|
"grad_norm": 0.7778238888894733,
|
|
"learning_rate": 3.911032308196518e-05,
|
|
"loss": 0.4759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4637409448623657,
|
|
"step": 805,
|
|
"valid_targets_mean": 2876.9,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 1.3106796116504853,
|
|
"grad_norm": 0.6288806794338688,
|
|
"learning_rate": 3.9086366465938194e-05,
|
|
"loss": 0.4464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42423275113105774,
|
|
"step": 810,
|
|
"valid_targets_mean": 4079.6,
|
|
"valid_targets_min": 1052
|
|
},
|
|
{
|
|
"epoch": 1.3187702265372168,
|
|
"grad_norm": 0.6926348687095119,
|
|
"learning_rate": 3.906209911244823e-05,
|
|
"loss": 0.438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4340791702270508,
|
|
"step": 815,
|
|
"valid_targets_mean": 3415.6,
|
|
"valid_targets_min": 1602
|
|
},
|
|
{
|
|
"epoch": 1.3268608414239482,
|
|
"grad_norm": 0.726835789472572,
|
|
"learning_rate": 3.903752141658232e-05,
|
|
"loss": 0.4684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.44731298089027405,
|
|
"step": 820,
|
|
"valid_targets_mean": 3181.6,
|
|
"valid_targets_min": 1316
|
|
},
|
|
{
|
|
"epoch": 1.3349514563106797,
|
|
"grad_norm": 0.789554550375042,
|
|
"learning_rate": 3.9012633778480054e-05,
|
|
"loss": 0.4608,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4829597473144531,
|
|
"step": 825,
|
|
"valid_targets_mean": 3121.9,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 1.343042071197411,
|
|
"grad_norm": 0.6713483819715406,
|
|
"learning_rate": 3.8987436603327064e-05,
|
|
"loss": 0.458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47119760513305664,
|
|
"step": 830,
|
|
"valid_targets_mean": 3586.2,
|
|
"valid_targets_min": 1317
|
|
},
|
|
{
|
|
"epoch": 1.3511326860841424,
|
|
"grad_norm": 0.7427556747463112,
|
|
"learning_rate": 3.896193030134844e-05,
|
|
"loss": 0.4479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4320055842399597,
|
|
"step": 835,
|
|
"valid_targets_mean": 2831.5,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 1.3592233009708738,
|
|
"grad_norm": 0.722101202230116,
|
|
"learning_rate": 3.893611528780204e-05,
|
|
"loss": 0.4705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4457879662513733,
|
|
"step": 840,
|
|
"valid_targets_mean": 3350.7,
|
|
"valid_targets_min": 1482
|
|
},
|
|
{
|
|
"epoch": 1.367313915857605,
|
|
"grad_norm": 0.6930675610937002,
|
|
"learning_rate": 3.890999198297171e-05,
|
|
"loss": 0.4597,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41945701837539673,
|
|
"step": 845,
|
|
"valid_targets_mean": 3731.3,
|
|
"valid_targets_min": 2450
|
|
},
|
|
{
|
|
"epoch": 1.3754045307443366,
|
|
"grad_norm": 0.8212062520721645,
|
|
"learning_rate": 3.888356081216049e-05,
|
|
"loss": 0.4658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.520559549331665,
|
|
"step": 850,
|
|
"valid_targets_mean": 2778.6,
|
|
"valid_targets_min": 1161
|
|
},
|
|
{
|
|
"epoch": 1.383495145631068,
|
|
"grad_norm": 0.9671309453773359,
|
|
"learning_rate": 3.8856822205683646e-05,
|
|
"loss": 0.4742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.5014246702194214,
|
|
"step": 855,
|
|
"valid_targets_mean": 2516.2,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 1.3915857605177995,
|
|
"grad_norm": 0.7705302765883373,
|
|
"learning_rate": 3.882977659886169e-05,
|
|
"loss": 0.4759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4699862003326416,
|
|
"step": 860,
|
|
"valid_targets_mean": 2672.7,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 1.3996763754045307,
|
|
"grad_norm": 0.7439341275062549,
|
|
"learning_rate": 3.8802424432013283e-05,
|
|
"loss": 0.4733,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.47823020815849304,
|
|
"step": 865,
|
|
"valid_targets_mean": 3144.7,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 1.4077669902912622,
|
|
"grad_norm": 0.8048849865194836,
|
|
"learning_rate": 3.8774766150448055e-05,
|
|
"loss": 0.4449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4707701802253723,
|
|
"step": 870,
|
|
"valid_targets_mean": 2757.4,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 1.4158576051779934,
|
|
"grad_norm": 0.7629841202120125,
|
|
"learning_rate": 3.874680220445938e-05,
|
|
"loss": 0.4529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4569883346557617,
|
|
"step": 875,
|
|
"valid_targets_mean": 3186.5,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 1.4239482200647249,
|
|
"grad_norm": 0.7580278084324503,
|
|
"learning_rate": 3.871853304931701e-05,
|
|
"loss": 0.4673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4785604178905487,
|
|
"step": 880,
|
|
"valid_targets_mean": 2945.1,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 1.4320388349514563,
|
|
"grad_norm": 0.7573866517699648,
|
|
"learning_rate": 3.8689959145259725e-05,
|
|
"loss": 0.4502,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4420326352119446,
|
|
"step": 885,
|
|
"valid_targets_mean": 2664.8,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 1.4401294498381878,
|
|
"grad_norm": 0.7250738746302343,
|
|
"learning_rate": 3.866108095748776e-05,
|
|
"loss": 0.4507,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4741765856742859,
|
|
"step": 890,
|
|
"valid_targets_mean": 3277.8,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 1.448220064724919,
|
|
"grad_norm": 0.6758182045981384,
|
|
"learning_rate": 3.863189895615529e-05,
|
|
"loss": 0.4262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3965120017528534,
|
|
"step": 895,
|
|
"valid_targets_mean": 3435.2,
|
|
"valid_targets_min": 1523
|
|
},
|
|
{
|
|
"epoch": 1.4563106796116505,
|
|
"grad_norm": 0.7611603876330763,
|
|
"learning_rate": 3.8602413616362745e-05,
|
|
"loss": 0.4358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39768022298812866,
|
|
"step": 900,
|
|
"valid_targets_mean": 4198.9,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 1.4644012944983817,
|
|
"grad_norm": 0.6247879749803259,
|
|
"learning_rate": 3.85726254181491e-05,
|
|
"loss": 0.4136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42923057079315186,
|
|
"step": 905,
|
|
"valid_targets_mean": 4307.2,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 1.4724919093851132,
|
|
"grad_norm": 0.590220922731002,
|
|
"learning_rate": 3.8542534846484055e-05,
|
|
"loss": 0.4118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38480812311172485,
|
|
"step": 910,
|
|
"valid_targets_mean": 4283.3,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 1.4805825242718447,
|
|
"grad_norm": 0.6691179747584267,
|
|
"learning_rate": 3.8512142391260085e-05,
|
|
"loss": 0.4168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4205031394958496,
|
|
"step": 915,
|
|
"valid_targets_mean": 4556.1,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 1.4886731391585761,
|
|
"grad_norm": 0.6291998089233802,
|
|
"learning_rate": 3.848144854728457e-05,
|
|
"loss": 0.3915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3751159906387329,
|
|
"step": 920,
|
|
"valid_targets_mean": 3447.2,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 1.4967637540453074,
|
|
"grad_norm": 0.6968952350014663,
|
|
"learning_rate": 3.8450453814271636e-05,
|
|
"loss": 0.4098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4444117546081543,
|
|
"step": 925,
|
|
"valid_targets_mean": 4328.6,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 1.5048543689320388,
|
|
"grad_norm": 0.6409550468842742,
|
|
"learning_rate": 3.8419158696834075e-05,
|
|
"loss": 0.4407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4060596227645874,
|
|
"step": 930,
|
|
"valid_targets_mean": 3835.6,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 1.51294498381877,
|
|
"grad_norm": 0.6424363352174571,
|
|
"learning_rate": 3.8387563704475124e-05,
|
|
"loss": 0.418,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3603544533252716,
|
|
"step": 935,
|
|
"valid_targets_mean": 3986.9,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 1.5210355987055015,
|
|
"grad_norm": 0.5995176367642314,
|
|
"learning_rate": 3.835566935158015e-05,
|
|
"loss": 0.3899,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3868566155433655,
|
|
"step": 940,
|
|
"valid_targets_mean": 4109.8,
|
|
"valid_targets_min": 1541
|
|
},
|
|
{
|
|
"epoch": 1.529126213592233,
|
|
"grad_norm": 0.6241850671944227,
|
|
"learning_rate": 3.8323476157408315e-05,
|
|
"loss": 0.3986,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4304489493370056,
|
|
"step": 945,
|
|
"valid_targets_mean": 4168.6,
|
|
"valid_targets_min": 2291
|
|
},
|
|
{
|
|
"epoch": 1.5372168284789645,
|
|
"grad_norm": 1.040760910988651,
|
|
"learning_rate": 3.8290984646084056e-05,
|
|
"loss": 0.4085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3906440734863281,
|
|
"step": 950,
|
|
"valid_targets_mean": 4216.5,
|
|
"valid_targets_min": 1752
|
|
},
|
|
{
|
|
"epoch": 1.545307443365696,
|
|
"grad_norm": 0.6519945827789969,
|
|
"learning_rate": 3.825819534658862e-05,
|
|
"loss": 0.3955,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4096558094024658,
|
|
"step": 955,
|
|
"valid_targets_mean": 3781.9,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 1.5533980582524272,
|
|
"grad_norm": 0.6479752533944285,
|
|
"learning_rate": 3.822510879275142e-05,
|
|
"loss": 0.3855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37997305393218994,
|
|
"step": 960,
|
|
"valid_targets_mean": 3348.1,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 1.5614886731391586,
|
|
"grad_norm": 0.588181029686509,
|
|
"learning_rate": 3.8191725523241346e-05,
|
|
"loss": 0.4,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4473213851451874,
|
|
"step": 965,
|
|
"valid_targets_mean": 4481.9,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 1.5695792880258899,
|
|
"grad_norm": 0.6714890977266147,
|
|
"learning_rate": 3.8158046081557986e-05,
|
|
"loss": 0.4068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4482303559780121,
|
|
"step": 970,
|
|
"valid_targets_mean": 3422.4,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 1.5776699029126213,
|
|
"grad_norm": 0.6060732511857289,
|
|
"learning_rate": 3.812407101602281e-05,
|
|
"loss": 0.3895,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3715343773365021,
|
|
"step": 975,
|
|
"valid_targets_mean": 3936.9,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 1.5857605177993528,
|
|
"grad_norm": 0.6001068677656148,
|
|
"learning_rate": 3.808980087977019e-05,
|
|
"loss": 0.3818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37591198086738586,
|
|
"step": 980,
|
|
"valid_targets_mean": 3986.6,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 1.5938511326860842,
|
|
"grad_norm": 0.7000946822381661,
|
|
"learning_rate": 3.8055236230738456e-05,
|
|
"loss": 0.3691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40361475944519043,
|
|
"step": 985,
|
|
"valid_targets_mean": 3311.6,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 1.6019417475728155,
|
|
"grad_norm": 0.5788320307984678,
|
|
"learning_rate": 3.8020377631660756e-05,
|
|
"loss": 0.4246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3535193204879761,
|
|
"step": 990,
|
|
"valid_targets_mean": 4680.1,
|
|
"valid_targets_min": 1798
|
|
},
|
|
{
|
|
"epoch": 1.610032362459547,
|
|
"grad_norm": 0.6639375139516203,
|
|
"learning_rate": 3.7985225650055956e-05,
|
|
"loss": 0.3949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4067763686180115,
|
|
"step": 995,
|
|
"valid_targets_mean": 4138.6,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 1.6181229773462782,
|
|
"grad_norm": 0.6129257993343638,
|
|
"learning_rate": 3.794978085821933e-05,
|
|
"loss": 0.3963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3734704852104187,
|
|
"step": 1000,
|
|
"valid_targets_mean": 3797.0,
|
|
"valid_targets_min": 1324
|
|
},
|
|
{
|
|
"epoch": 1.6262135922330097,
|
|
"grad_norm": 0.586309978554014,
|
|
"learning_rate": 3.7914043833213294e-05,
|
|
"loss": 0.4079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4114878475666046,
|
|
"step": 1005,
|
|
"valid_targets_mean": 4714.8,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 1.6343042071197411,
|
|
"grad_norm": 0.7412145341429787,
|
|
"learning_rate": 3.787801515685799e-05,
|
|
"loss": 0.416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42866048216819763,
|
|
"step": 1010,
|
|
"valid_targets_mean": 3223.0,
|
|
"valid_targets_min": 1392
|
|
},
|
|
{
|
|
"epoch": 1.6423948220064726,
|
|
"grad_norm": 0.6298901268481449,
|
|
"learning_rate": 3.7841695415721826e-05,
|
|
"loss": 0.3723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38183513283729553,
|
|
"step": 1015,
|
|
"valid_targets_mean": 3943.1,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 1.650485436893204,
|
|
"grad_norm": 0.6176784490755091,
|
|
"learning_rate": 3.78050852011119e-05,
|
|
"loss": 0.3579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33833619952201843,
|
|
"step": 1020,
|
|
"valid_targets_mean": 4954.7,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 1.6585760517799353,
|
|
"grad_norm": 0.6087099001099794,
|
|
"learning_rate": 3.776818510906442e-05,
|
|
"loss": 0.3708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3340659737586975,
|
|
"step": 1025,
|
|
"valid_targets_mean": 3676.8,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.5668591607843806,
|
|
"learning_rate": 3.773099574033495e-05,
|
|
"loss": 0.3784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34908145666122437,
|
|
"step": 1030,
|
|
"valid_targets_mean": 4621.4,
|
|
"valid_targets_min": 1738
|
|
},
|
|
{
|
|
"epoch": 1.674757281553398,
|
|
"grad_norm": 0.6811988218257936,
|
|
"learning_rate": 3.7693517700388655e-05,
|
|
"loss": 0.3617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33428844809532166,
|
|
"step": 1035,
|
|
"valid_targets_mean": 3523.9,
|
|
"valid_targets_min": 1856
|
|
},
|
|
{
|
|
"epoch": 1.6828478964401294,
|
|
"grad_norm": 0.661166060785201,
|
|
"learning_rate": 3.765575159939045e-05,
|
|
"loss": 0.3828,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38519763946533203,
|
|
"step": 1040,
|
|
"valid_targets_mean": 3767.3,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 1.690938511326861,
|
|
"grad_norm": 0.635519786642459,
|
|
"learning_rate": 3.761769805219505e-05,
|
|
"loss": 0.3882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3758396506309509,
|
|
"step": 1045,
|
|
"valid_targets_mean": 4240.9,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 1.6990291262135924,
|
|
"grad_norm": 0.5795362932404822,
|
|
"learning_rate": 3.757935767833696e-05,
|
|
"loss": 0.3795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.406206339597702,
|
|
"step": 1050,
|
|
"valid_targets_mean": 5330.5,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 1.7071197411003236,
|
|
"grad_norm": 0.5983057188423216,
|
|
"learning_rate": 3.7540731102020405e-05,
|
|
"loss": 0.3742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3096472918987274,
|
|
"step": 1055,
|
|
"valid_targets_mean": 3586.9,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 1.715210355987055,
|
|
"grad_norm": 0.6829004913654826,
|
|
"learning_rate": 3.7501818952109156e-05,
|
|
"loss": 0.362,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3755829334259033,
|
|
"step": 1060,
|
|
"valid_targets_mean": 3200.2,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 1.7233009708737863,
|
|
"grad_norm": 0.6663840962866384,
|
|
"learning_rate": 3.746262186211629e-05,
|
|
"loss": 0.3735,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4185965955257416,
|
|
"step": 1065,
|
|
"valid_targets_mean": 3854.4,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 1.7313915857605178,
|
|
"grad_norm": 1.678927068242607,
|
|
"learning_rate": 3.742314047019386e-05,
|
|
"loss": 0.383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4099944829940796,
|
|
"step": 1070,
|
|
"valid_targets_mean": 2972.9,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 1.7394822006472492,
|
|
"grad_norm": 0.6533292095832952,
|
|
"learning_rate": 3.7383375419122565e-05,
|
|
"loss": 0.3794,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3951898515224457,
|
|
"step": 1075,
|
|
"valid_targets_mean": 3447.4,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 1.7475728155339807,
|
|
"grad_norm": 0.6963710269510751,
|
|
"learning_rate": 3.734332735630121e-05,
|
|
"loss": 0.3781,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39096444845199585,
|
|
"step": 1080,
|
|
"valid_targets_mean": 3488.9,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 1.755663430420712,
|
|
"grad_norm": 0.4540955335898916,
|
|
"learning_rate": 3.730299693373622e-05,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24774682521820068,
|
|
"step": 1085,
|
|
"valid_targets_mean": 6022.9,
|
|
"valid_targets_min": 2384
|
|
},
|
|
{
|
|
"epoch": 1.7637540453074434,
|
|
"grad_norm": 0.42684160855409603,
|
|
"learning_rate": 3.7262384808031004e-05,
|
|
"loss": 0.2563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24269774556159973,
|
|
"step": 1090,
|
|
"valid_targets_mean": 6773.4,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 1.7718446601941746,
|
|
"grad_norm": 0.49895237577685475,
|
|
"learning_rate": 3.722149164037525e-05,
|
|
"loss": 0.2542,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23894259333610535,
|
|
"step": 1095,
|
|
"valid_targets_mean": 5853.4,
|
|
"valid_targets_min": 2336
|
|
},
|
|
{
|
|
"epoch": 1.779935275080906,
|
|
"grad_norm": 0.6262888400448398,
|
|
"learning_rate": 3.718031809653419e-05,
|
|
"loss": 0.2449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23444335162639618,
|
|
"step": 1100,
|
|
"valid_targets_mean": 5625.0,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 1.7880258899676376,
|
|
"grad_norm": 0.5207075255554792,
|
|
"learning_rate": 3.713886484683776e-05,
|
|
"loss": 0.2566,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2157619595527649,
|
|
"step": 1105,
|
|
"valid_targets_mean": 4999.9,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 1.796116504854369,
|
|
"grad_norm": 0.515214744226938,
|
|
"learning_rate": 3.7097132566169644e-05,
|
|
"loss": 0.2352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22869645059108734,
|
|
"step": 1110,
|
|
"valid_targets_mean": 5886.6,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 1.8042071197411005,
|
|
"grad_norm": 0.5213961417814614,
|
|
"learning_rate": 3.7055121933956344e-05,
|
|
"loss": 0.2405,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22981508076190948,
|
|
"step": 1115,
|
|
"valid_targets_mean": 5545.2,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 1.8122977346278317,
|
|
"grad_norm": 0.5011582379645335,
|
|
"learning_rate": 3.7012833634156074e-05,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22520172595977783,
|
|
"step": 1120,
|
|
"valid_targets_mean": 5221.4,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 1.820388349514563,
|
|
"grad_norm": 0.433787558559027,
|
|
"learning_rate": 3.6970268355247664e-05,
|
|
"loss": 0.2531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21840864419937134,
|
|
"step": 1125,
|
|
"valid_targets_mean": 6906.4,
|
|
"valid_targets_min": 3688
|
|
},
|
|
{
|
|
"epoch": 1.8284789644012944,
|
|
"grad_norm": 0.4461456898907489,
|
|
"learning_rate": 3.6927426790219296e-05,
|
|
"loss": 0.2396,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2790132164955139,
|
|
"step": 1130,
|
|
"valid_targets_mean": 5886.7,
|
|
"valid_targets_min": 3248
|
|
},
|
|
{
|
|
"epoch": 1.8365695792880259,
|
|
"grad_norm": 0.4450170159382012,
|
|
"learning_rate": 3.6884309636557294e-05,
|
|
"loss": 0.2286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23092791438102722,
|
|
"step": 1135,
|
|
"valid_targets_mean": 5565.2,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 1.8446601941747574,
|
|
"grad_norm": 0.5118163182829762,
|
|
"learning_rate": 3.6840917596234706e-05,
|
|
"loss": 0.245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.270181804895401,
|
|
"step": 1140,
|
|
"valid_targets_mean": 4495.2,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 1.8527508090614888,
|
|
"grad_norm": 0.5116271433775103,
|
|
"learning_rate": 3.67972513756999e-05,
|
|
"loss": 0.2605,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24221359193325043,
|
|
"step": 1145,
|
|
"valid_targets_mean": 5532.5,
|
|
"valid_targets_min": 3084
|
|
},
|
|
{
|
|
"epoch": 1.86084142394822,
|
|
"grad_norm": 0.5179137281679843,
|
|
"learning_rate": 3.675331168586507e-05,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2840508222579956,
|
|
"step": 1150,
|
|
"valid_targets_mean": 6345.2,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 1.8689320388349513,
|
|
"grad_norm": 0.45633158484470104,
|
|
"learning_rate": 3.670909924209464e-05,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22190481424331665,
|
|
"step": 1155,
|
|
"valid_targets_mean": 6782.8,
|
|
"valid_targets_min": 4896
|
|
},
|
|
{
|
|
"epoch": 1.8770226537216828,
|
|
"grad_norm": 0.47235383732384373,
|
|
"learning_rate": 3.666461476419366e-05,
|
|
"loss": 0.234,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19451837241649628,
|
|
"step": 1160,
|
|
"valid_targets_mean": 4796.1,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 1.8851132686084142,
|
|
"grad_norm": 0.4626112416545145,
|
|
"learning_rate": 3.6619858976396024e-05,
|
|
"loss": 0.2539,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2689831554889679,
|
|
"step": 1165,
|
|
"valid_targets_mean": 6176.4,
|
|
"valid_targets_min": 2462
|
|
},
|
|
{
|
|
"epoch": 1.8932038834951457,
|
|
"grad_norm": 0.5391505583467975,
|
|
"learning_rate": 3.657483260735274e-05,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3605060577392578,
|
|
"step": 1170,
|
|
"valid_targets_mean": 5792.8,
|
|
"valid_targets_min": 3198
|
|
},
|
|
{
|
|
"epoch": 1.9012944983818771,
|
|
"grad_norm": 0.45599119133953253,
|
|
"learning_rate": 3.652953639012001e-05,
|
|
"loss": 0.2381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21034517884254456,
|
|
"step": 1175,
|
|
"valid_targets_mean": 6734.0,
|
|
"valid_targets_min": 3363
|
|
},
|
|
{
|
|
"epoch": 1.9093851132686084,
|
|
"grad_norm": 0.45662533458050836,
|
|
"learning_rate": 3.648397106214737e-05,
|
|
"loss": 0.2376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21296310424804688,
|
|
"step": 1180,
|
|
"valid_targets_mean": 6156.2,
|
|
"valid_targets_min": 3554
|
|
},
|
|
{
|
|
"epoch": 1.9174757281553398,
|
|
"grad_norm": 0.4433638656237555,
|
|
"learning_rate": 3.6438137365265605e-05,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25976046919822693,
|
|
"step": 1185,
|
|
"valid_targets_mean": 6275.4,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 1.925566343042071,
|
|
"grad_norm": 0.4679879684777045,
|
|
"learning_rate": 3.639203604567471e-05,
|
|
"loss": 0.2203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24510705471038818,
|
|
"step": 1190,
|
|
"valid_targets_mean": 6298.0,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 1.9336569579288025,
|
|
"grad_norm": 0.4977336720705982,
|
|
"learning_rate": 3.634566785393175e-05,
|
|
"loss": 0.2461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31283706426620483,
|
|
"step": 1195,
|
|
"valid_targets_mean": 6639.6,
|
|
"valid_targets_min": 2502
|
|
},
|
|
{
|
|
"epoch": 1.941747572815534,
|
|
"grad_norm": 0.5202866468820287,
|
|
"learning_rate": 3.629903354493863e-05,
|
|
"loss": 0.2336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23550382256507874,
|
|
"step": 1200,
|
|
"valid_targets_mean": 5588.2,
|
|
"valid_targets_min": 3131
|
|
},
|
|
{
|
|
"epoch": 1.9498381877022655,
|
|
"grad_norm": 0.5219947278827387,
|
|
"learning_rate": 3.625213387792979e-05,
|
|
"loss": 0.2171,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2246617078781128,
|
|
"step": 1205,
|
|
"valid_targets_mean": 5546.2,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 1.9579288025889967,
|
|
"grad_norm": 0.46448093815593106,
|
|
"learning_rate": 3.620496961645986e-05,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23543399572372437,
|
|
"step": 1210,
|
|
"valid_targets_mean": 5383.6,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 1.9660194174757282,
|
|
"grad_norm": 0.5060914468823391,
|
|
"learning_rate": 3.615754152839122e-05,
|
|
"loss": 0.2466,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29016217589378357,
|
|
"step": 1215,
|
|
"valid_targets_mean": 5866.8,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 1.9741100323624594,
|
|
"grad_norm": 0.47924017927514234,
|
|
"learning_rate": 3.610985038588153e-05,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23586151003837585,
|
|
"step": 1220,
|
|
"valid_targets_mean": 5499.6,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 1.9822006472491909,
|
|
"grad_norm": 0.4857460079460246,
|
|
"learning_rate": 3.606189696537112e-05,
|
|
"loss": 0.2419,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2761743664741516,
|
|
"step": 1225,
|
|
"valid_targets_mean": 6634.6,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 1.9902912621359223,
|
|
"grad_norm": 0.4621192062943124,
|
|
"learning_rate": 3.601368204757034e-05,
|
|
"loss": 0.2446,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2509958744049072,
|
|
"step": 1230,
|
|
"valid_targets_mean": 5642.3,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 1.9983818770226538,
|
|
"grad_norm": 0.504285712583757,
|
|
"learning_rate": 3.596520641744691e-05,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23630420863628387,
|
|
"step": 1235,
|
|
"valid_targets_mean": 5486.1,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 2.0064724919093853,
|
|
"grad_norm": 0.6014265194583701,
|
|
"learning_rate": 3.591647086421308e-05,
|
|
"loss": 0.3205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2990904748439789,
|
|
"step": 1240,
|
|
"valid_targets_mean": 4996.2,
|
|
"valid_targets_min": 536
|
|
},
|
|
{
|
|
"epoch": 2.0145631067961167,
|
|
"grad_norm": 0.4549243639276278,
|
|
"learning_rate": 3.586747618131281e-05,
|
|
"loss": 0.3382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3390650749206543,
|
|
"step": 1245,
|
|
"valid_targets_mean": 8058.4,
|
|
"valid_targets_min": 3475
|
|
},
|
|
{
|
|
"epoch": 2.0226537216828477,
|
|
"grad_norm": 0.4749318683485237,
|
|
"learning_rate": 3.581822316640884e-05,
|
|
"loss": 0.332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3646315932273865,
|
|
"step": 1250,
|
|
"valid_targets_mean": 7721.6,
|
|
"valid_targets_min": 2978
|
|
},
|
|
{
|
|
"epoch": 2.030744336569579,
|
|
"grad_norm": 0.5059476480776492,
|
|
"learning_rate": 3.5768712621369724e-05,
|
|
"loss": 0.3193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3097308576107025,
|
|
"step": 1255,
|
|
"valid_targets_mean": 6325.1,
|
|
"valid_targets_min": 2196
|
|
},
|
|
{
|
|
"epoch": 2.0388349514563107,
|
|
"grad_norm": 0.43567691552176574,
|
|
"learning_rate": 3.571894535225674e-05,
|
|
"loss": 0.311,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29119089245796204,
|
|
"step": 1260,
|
|
"valid_targets_mean": 7313.6,
|
|
"valid_targets_min": 2706
|
|
},
|
|
{
|
|
"epoch": 2.046925566343042,
|
|
"grad_norm": 0.9614339545477306,
|
|
"learning_rate": 3.566892216931081e-05,
|
|
"loss": 0.3381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38573122024536133,
|
|
"step": 1265,
|
|
"valid_targets_mean": 8262.7,
|
|
"valid_targets_min": 2505
|
|
},
|
|
{
|
|
"epoch": 2.0550161812297736,
|
|
"grad_norm": 0.6941665522585272,
|
|
"learning_rate": 3.561864388693926e-05,
|
|
"loss": 0.3249,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28548356890678406,
|
|
"step": 1270,
|
|
"valid_targets_mean": 6143.1,
|
|
"valid_targets_min": 611
|
|
},
|
|
{
|
|
"epoch": 2.063106796116505,
|
|
"grad_norm": 0.42092885788585327,
|
|
"learning_rate": 3.556811132370261e-05,
|
|
"loss": 0.3297,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.306254506111145,
|
|
"step": 1275,
|
|
"valid_targets_mean": 6580.4,
|
|
"valid_targets_min": 3909
|
|
},
|
|
{
|
|
"epoch": 2.071197411003236,
|
|
"grad_norm": 0.48148221197231267,
|
|
"learning_rate": 3.551732530230123e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2623848021030426,
|
|
"step": 1280,
|
|
"valid_targets_mean": 6454.7,
|
|
"valid_targets_min": 2205
|
|
},
|
|
{
|
|
"epoch": 2.0792880258899675,
|
|
"grad_norm": 0.45157690368933484,
|
|
"learning_rate": 3.54662866495619e-05,
|
|
"loss": 0.3178,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33691227436065674,
|
|
"step": 1285,
|
|
"valid_targets_mean": 7295.4,
|
|
"valid_targets_min": 2485
|
|
},
|
|
{
|
|
"epoch": 2.087378640776699,
|
|
"grad_norm": 0.44525824358048083,
|
|
"learning_rate": 3.5414996196424435e-05,
|
|
"loss": 0.3173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31353265047073364,
|
|
"step": 1290,
|
|
"valid_targets_mean": 7182.0,
|
|
"valid_targets_min": 1873
|
|
},
|
|
{
|
|
"epoch": 2.0954692556634305,
|
|
"grad_norm": 0.4644732991993433,
|
|
"learning_rate": 3.536345477792809e-05,
|
|
"loss": 0.3422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3179835081100464,
|
|
"step": 1295,
|
|
"valid_targets_mean": 7481.9,
|
|
"valid_targets_min": 3065
|
|
},
|
|
{
|
|
"epoch": 2.103559870550162,
|
|
"grad_norm": 0.4519791055149053,
|
|
"learning_rate": 3.5311663233197984e-05,
|
|
"loss": 0.3272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3246445655822754,
|
|
"step": 1300,
|
|
"valid_targets_mean": 7622.2,
|
|
"valid_targets_min": 3085
|
|
},
|
|
{
|
|
"epoch": 2.1116504854368934,
|
|
"grad_norm": 0.4786819308529633,
|
|
"learning_rate": 3.525962240543144e-05,
|
|
"loss": 0.3037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3294726610183716,
|
|
"step": 1305,
|
|
"valid_targets_mean": 6445.4,
|
|
"valid_targets_min": 2870
|
|
},
|
|
{
|
|
"epoch": 2.1197411003236244,
|
|
"grad_norm": 0.4554086419738832,
|
|
"learning_rate": 3.520733314188429e-05,
|
|
"loss": 0.3271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31785595417022705,
|
|
"step": 1310,
|
|
"valid_targets_mean": 6752.0,
|
|
"valid_targets_min": 2568
|
|
},
|
|
{
|
|
"epoch": 2.127831715210356,
|
|
"grad_norm": 0.45953570374734476,
|
|
"learning_rate": 3.515479629385699e-05,
|
|
"loss": 0.3514,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3112117648124695,
|
|
"step": 1315,
|
|
"valid_targets_mean": 7377.4,
|
|
"valid_targets_min": 3797
|
|
},
|
|
{
|
|
"epoch": 2.1359223300970873,
|
|
"grad_norm": 0.43107122770104195,
|
|
"learning_rate": 3.5102012716680875e-05,
|
|
"loss": 0.3059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29922381043434143,
|
|
"step": 1320,
|
|
"valid_targets_mean": 7377.4,
|
|
"valid_targets_min": 3726
|
|
},
|
|
{
|
|
"epoch": 2.144012944983819,
|
|
"grad_norm": 0.4890528047808735,
|
|
"learning_rate": 3.504898326970414e-05,
|
|
"loss": 0.2913,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28039124608039856,
|
|
"step": 1325,
|
|
"valid_targets_mean": 6884.6,
|
|
"valid_targets_min": 695
|
|
},
|
|
{
|
|
"epoch": 2.1521035598705502,
|
|
"grad_norm": 0.4492257868444859,
|
|
"learning_rate": 3.499570881627791e-05,
|
|
"loss": 0.3286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31394723057746887,
|
|
"step": 1330,
|
|
"valid_targets_mean": 7725.1,
|
|
"valid_targets_min": 3973
|
|
},
|
|
{
|
|
"epoch": 2.1601941747572817,
|
|
"grad_norm": 0.45196170093359594,
|
|
"learning_rate": 3.494219022374215e-05,
|
|
"loss": 0.333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36955010890960693,
|
|
"step": 1335,
|
|
"valid_targets_mean": 7827.6,
|
|
"valid_targets_min": 3936
|
|
},
|
|
{
|
|
"epoch": 2.168284789644013,
|
|
"grad_norm": 0.42892869442565384,
|
|
"learning_rate": 3.4888428363411564e-05,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33320367336273193,
|
|
"step": 1340,
|
|
"valid_targets_mean": 8419.2,
|
|
"valid_targets_min": 5385
|
|
},
|
|
{
|
|
"epoch": 2.176375404530744,
|
|
"grad_norm": 0.45744113715758045,
|
|
"learning_rate": 3.48344241105614e-05,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35591331124305725,
|
|
"step": 1345,
|
|
"valid_targets_mean": 8293.9,
|
|
"valid_targets_min": 3945
|
|
},
|
|
{
|
|
"epoch": 2.1844660194174756,
|
|
"grad_norm": 0.42030711419530997,
|
|
"learning_rate": 3.478017834441319e-05,
|
|
"loss": 0.3098,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29616591334342957,
|
|
"step": 1350,
|
|
"valid_targets_mean": 7622.2,
|
|
"valid_targets_min": 3170
|
|
},
|
|
{
|
|
"epoch": 2.192556634304207,
|
|
"grad_norm": 0.42933777850934934,
|
|
"learning_rate": 3.472569194812045e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29085344076156616,
|
|
"step": 1355,
|
|
"valid_targets_mean": 7071.5,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 2.2006472491909386,
|
|
"grad_norm": 0.4822656065504384,
|
|
"learning_rate": 3.467096580875432e-05,
|
|
"loss": 0.3222,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3630227744579315,
|
|
"step": 1360,
|
|
"valid_targets_mean": 7871.8,
|
|
"valid_targets_min": 4330
|
|
},
|
|
{
|
|
"epoch": 2.20873786407767,
|
|
"grad_norm": 0.7805285602686421,
|
|
"learning_rate": 3.4616000817289076e-05,
|
|
"loss": 0.4057,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.454883337020874,
|
|
"step": 1365,
|
|
"valid_targets_mean": 2993.9,
|
|
"valid_targets_min": 1430
|
|
},
|
|
{
|
|
"epoch": 2.2168284789644015,
|
|
"grad_norm": 0.7805602334689248,
|
|
"learning_rate": 3.456079786858766e-05,
|
|
"loss": 0.4537,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.45851534605026245,
|
|
"step": 1370,
|
|
"valid_targets_mean": 3060.9,
|
|
"valid_targets_min": 1119
|
|
},
|
|
{
|
|
"epoch": 2.2249190938511325,
|
|
"grad_norm": 0.8369096908438444,
|
|
"learning_rate": 3.450535786138709e-05,
|
|
"loss": 0.4329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42663732171058655,
|
|
"step": 1375,
|
|
"valid_targets_mean": 2852.3,
|
|
"valid_targets_min": 1528
|
|
},
|
|
{
|
|
"epoch": 2.233009708737864,
|
|
"grad_norm": 0.7602674447866041,
|
|
"learning_rate": 3.4449681698283856e-05,
|
|
"loss": 0.4264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4672640264034271,
|
|
"step": 1380,
|
|
"valid_targets_mean": 3322.4,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 2.2411003236245954,
|
|
"grad_norm": 0.7653932780247311,
|
|
"learning_rate": 3.4393770285719196e-05,
|
|
"loss": 0.4421,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4281241297721863,
|
|
"step": 1385,
|
|
"valid_targets_mean": 3253.7,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 2.249190938511327,
|
|
"grad_norm": 0.8296647531841562,
|
|
"learning_rate": 3.433762453396434e-05,
|
|
"loss": 0.4413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4567793905735016,
|
|
"step": 1390,
|
|
"valid_targets_mean": 2743.4,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 2.2572815533980584,
|
|
"grad_norm": 0.6605358764748289,
|
|
"learning_rate": 3.4281245357105726e-05,
|
|
"loss": 0.4494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4331215023994446,
|
|
"step": 1395,
|
|
"valid_targets_mean": 4347.8,
|
|
"valid_targets_min": 1044
|
|
},
|
|
{
|
|
"epoch": 2.26537216828479,
|
|
"grad_norm": 0.7525371302324368,
|
|
"learning_rate": 3.4224633673030074e-05,
|
|
"loss": 0.4242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.46705174446105957,
|
|
"step": 1400,
|
|
"valid_targets_mean": 3000.2,
|
|
"valid_targets_min": 1293
|
|
},
|
|
{
|
|
"epoch": 2.273462783171521,
|
|
"grad_norm": 0.828408984869813,
|
|
"learning_rate": 3.416779040340945e-05,
|
|
"loss": 0.4101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41368424892425537,
|
|
"step": 1405,
|
|
"valid_targets_mean": 3091.8,
|
|
"valid_targets_min": 1429
|
|
},
|
|
{
|
|
"epoch": 2.2815533980582523,
|
|
"grad_norm": 0.8731185162707,
|
|
"learning_rate": 3.4110716473686305e-05,
|
|
"loss": 0.4412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4121938943862915,
|
|
"step": 1410,
|
|
"valid_targets_mean": 2893.9,
|
|
"valid_targets_min": 1170
|
|
},
|
|
{
|
|
"epoch": 2.2896440129449838,
|
|
"grad_norm": 0.6499592039768247,
|
|
"learning_rate": 3.405341281305835e-05,
|
|
"loss": 0.4151,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4045106768608093,
|
|
"step": 1415,
|
|
"valid_targets_mean": 3624.3,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 2.2977346278317152,
|
|
"grad_norm": 0.8289152009164938,
|
|
"learning_rate": 3.3995880354463444e-05,
|
|
"loss": 0.4327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4141398072242737,
|
|
"step": 1420,
|
|
"valid_targets_mean": 2964.5,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 2.3058252427184467,
|
|
"grad_norm": 0.9528691187210049,
|
|
"learning_rate": 3.393812003456444e-05,
|
|
"loss": 0.414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41642385721206665,
|
|
"step": 1425,
|
|
"valid_targets_mean": 3237.4,
|
|
"valid_targets_min": 1275
|
|
},
|
|
{
|
|
"epoch": 2.313915857605178,
|
|
"grad_norm": 0.7080266433968921,
|
|
"learning_rate": 3.3880132793733896e-05,
|
|
"loss": 0.3973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4032875895500183,
|
|
"step": 1430,
|
|
"valid_targets_mean": 2979.6,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 2.3220064724919096,
|
|
"grad_norm": 0.6952529365208576,
|
|
"learning_rate": 3.382191957603878e-05,
|
|
"loss": 0.3952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4107402563095093,
|
|
"step": 1435,
|
|
"valid_targets_mean": 3230.4,
|
|
"valid_targets_min": 1244
|
|
},
|
|
{
|
|
"epoch": 2.3300970873786406,
|
|
"grad_norm": 0.7272498923580895,
|
|
"learning_rate": 3.376348132922509e-05,
|
|
"loss": 0.4134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4044322073459625,
|
|
"step": 1440,
|
|
"valid_targets_mean": 2864.6,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 2.338187702265372,
|
|
"grad_norm": 0.6959547439645711,
|
|
"learning_rate": 3.3704819004702444e-05,
|
|
"loss": 0.4068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4201893210411072,
|
|
"step": 1445,
|
|
"valid_targets_mean": 3619.3,
|
|
"valid_targets_min": 1486
|
|
},
|
|
{
|
|
"epoch": 2.3462783171521036,
|
|
"grad_norm": 0.6715979921248629,
|
|
"learning_rate": 3.364593355752857e-05,
|
|
"loss": 0.4141,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4077046513557434,
|
|
"step": 1450,
|
|
"valid_targets_mean": 3863.5,
|
|
"valid_targets_min": 1672
|
|
},
|
|
{
|
|
"epoch": 2.354368932038835,
|
|
"grad_norm": 0.7383136028350261,
|
|
"learning_rate": 3.358682594639379e-05,
|
|
"loss": 0.4009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43795761466026306,
|
|
"step": 1455,
|
|
"valid_targets_mean": 3301.6,
|
|
"valid_targets_min": 1346
|
|
},
|
|
{
|
|
"epoch": 2.3624595469255665,
|
|
"grad_norm": 0.7808846773518752,
|
|
"learning_rate": 3.3527497133605345e-05,
|
|
"loss": 0.4271,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4166765511035919,
|
|
"step": 1460,
|
|
"valid_targets_mean": 2826.4,
|
|
"valid_targets_min": 1269
|
|
},
|
|
{
|
|
"epoch": 2.3705501618122975,
|
|
"grad_norm": 1.8254727752794442,
|
|
"learning_rate": 3.346794808507182e-05,
|
|
"loss": 0.4049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3987411856651306,
|
|
"step": 1465,
|
|
"valid_targets_mean": 2513.6,
|
|
"valid_targets_min": 1334
|
|
},
|
|
{
|
|
"epoch": 2.378640776699029,
|
|
"grad_norm": 0.8176247713463801,
|
|
"learning_rate": 3.340817977028732e-05,
|
|
"loss": 0.4113,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40195712447166443,
|
|
"step": 1470,
|
|
"valid_targets_mean": 2750.2,
|
|
"valid_targets_min": 1299
|
|
},
|
|
{
|
|
"epoch": 2.3867313915857604,
|
|
"grad_norm": 0.9062003165524765,
|
|
"learning_rate": 3.3348193162315786e-05,
|
|
"loss": 0.4376,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4356459677219391,
|
|
"step": 1475,
|
|
"valid_targets_mean": 2549.2,
|
|
"valid_targets_min": 1338
|
|
},
|
|
{
|
|
"epoch": 2.394822006472492,
|
|
"grad_norm": 0.7055341758798552,
|
|
"learning_rate": 3.3287989237775064e-05,
|
|
"loss": 0.4208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43329355120658875,
|
|
"step": 1480,
|
|
"valid_targets_mean": 3387.4,
|
|
"valid_targets_min": 1218
|
|
},
|
|
{
|
|
"epoch": 2.4029126213592233,
|
|
"grad_norm": 0.7345506690309963,
|
|
"learning_rate": 3.322756897682106e-05,
|
|
"loss": 0.406,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4088819921016693,
|
|
"step": 1485,
|
|
"valid_targets_mean": 3456.2,
|
|
"valid_targets_min": 1527
|
|
},
|
|
{
|
|
"epoch": 2.411003236245955,
|
|
"grad_norm": 0.8017979886236237,
|
|
"learning_rate": 3.3166933363131765e-05,
|
|
"loss": 0.395,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39552873373031616,
|
|
"step": 1490,
|
|
"valid_targets_mean": 2982.2,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 2.4190938511326863,
|
|
"grad_norm": 0.937459026892843,
|
|
"learning_rate": 3.310608338389124e-05,
|
|
"loss": 0.4232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41104790568351746,
|
|
"step": 1495,
|
|
"valid_targets_mean": 3403.0,
|
|
"valid_targets_min": 1495
|
|
},
|
|
{
|
|
"epoch": 2.4271844660194173,
|
|
"grad_norm": 0.7438617223810716,
|
|
"learning_rate": 3.304502002977355e-05,
|
|
"loss": 0.4006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38526734709739685,
|
|
"step": 1500,
|
|
"valid_targets_mean": 3076.9,
|
|
"valid_targets_min": 1249
|
|
},
|
|
{
|
|
"epoch": 2.4352750809061487,
|
|
"grad_norm": 0.7446394304640043,
|
|
"learning_rate": 3.2983744294926614e-05,
|
|
"loss": 0.416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39251405000686646,
|
|
"step": 1505,
|
|
"valid_targets_mean": 3397.1,
|
|
"valid_targets_min": 1195
|
|
},
|
|
{
|
|
"epoch": 2.44336569579288,
|
|
"grad_norm": 0.7362091795056039,
|
|
"learning_rate": 3.292225717695606e-05,
|
|
"loss": 0.3898,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38442325592041016,
|
|
"step": 1510,
|
|
"valid_targets_mean": 2886.0,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 2.4514563106796117,
|
|
"grad_norm": 0.8169327602294557,
|
|
"learning_rate": 3.286055967690894e-05,
|
|
"loss": 0.3826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38595718145370483,
|
|
"step": 1515,
|
|
"valid_targets_mean": 2945.9,
|
|
"valid_targets_min": 1068
|
|
},
|
|
{
|
|
"epoch": 2.459546925566343,
|
|
"grad_norm": 0.7799505590543111,
|
|
"learning_rate": 3.279865279925748e-05,
|
|
"loss": 0.3822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3753122389316559,
|
|
"step": 1520,
|
|
"valid_targets_mean": 2965.0,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 2.4676375404530746,
|
|
"grad_norm": 0.5674370477801554,
|
|
"learning_rate": 3.273653755188265e-05,
|
|
"loss": 0.3715,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37651532888412476,
|
|
"step": 1525,
|
|
"valid_targets_mean": 4745.3,
|
|
"valid_targets_min": 688
|
|
},
|
|
{
|
|
"epoch": 2.475728155339806,
|
|
"grad_norm": 0.6247727477015123,
|
|
"learning_rate": 3.267421494605786e-05,
|
|
"loss": 0.371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3644024133682251,
|
|
"step": 1530,
|
|
"valid_targets_mean": 4116.0,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 2.483818770226537,
|
|
"grad_norm": 0.7491582122547379,
|
|
"learning_rate": 3.261168599643239e-05,
|
|
"loss": 0.37,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35451722145080566,
|
|
"step": 1535,
|
|
"valid_targets_mean": 3082.2,
|
|
"valid_targets_min": 1538
|
|
},
|
|
{
|
|
"epoch": 2.4919093851132685,
|
|
"grad_norm": 0.6429281049648273,
|
|
"learning_rate": 3.254895172101495e-05,
|
|
"loss": 0.3482,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33318030834198,
|
|
"step": 1540,
|
|
"valid_targets_mean": 3706.6,
|
|
"valid_targets_min": 1503
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.6523483751167802,
|
|
"learning_rate": 3.248601314115709e-05,
|
|
"loss": 0.3875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3792145848274231,
|
|
"step": 1545,
|
|
"valid_targets_mean": 3630.9,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 2.5080906148867315,
|
|
"grad_norm": 0.7249836596601235,
|
|
"learning_rate": 3.2422871281536504e-05,
|
|
"loss": 0.3896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3936595022678375,
|
|
"step": 1550,
|
|
"valid_targets_mean": 3107.8,
|
|
"valid_targets_min": 1074
|
|
},
|
|
{
|
|
"epoch": 2.516181229773463,
|
|
"grad_norm": 0.8616243014972932,
|
|
"learning_rate": 3.235952717014046e-05,
|
|
"loss": 0.3545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3657923936843872,
|
|
"step": 1555,
|
|
"valid_targets_mean": 3956.6,
|
|
"valid_targets_min": 974
|
|
},
|
|
{
|
|
"epoch": 2.524271844660194,
|
|
"grad_norm": 0.638664828509639,
|
|
"learning_rate": 3.229598183824897e-05,
|
|
"loss": 0.358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37439674139022827,
|
|
"step": 1560,
|
|
"valid_targets_mean": 4259.0,
|
|
"valid_targets_min": 1555
|
|
},
|
|
{
|
|
"epoch": 2.5323624595469254,
|
|
"grad_norm": 0.6056668481066265,
|
|
"learning_rate": 3.223223632041806e-05,
|
|
"loss": 0.3682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3590593934059143,
|
|
"step": 1565,
|
|
"valid_targets_mean": 4229.0,
|
|
"valid_targets_min": 1335
|
|
},
|
|
{
|
|
"epoch": 2.540453074433657,
|
|
"grad_norm": 0.590522050754507,
|
|
"learning_rate": 3.216829165446288e-05,
|
|
"loss": 0.3579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3400675654411316,
|
|
"step": 1570,
|
|
"valid_targets_mean": 4780.1,
|
|
"valid_targets_min": 1276
|
|
},
|
|
{
|
|
"epoch": 2.5485436893203883,
|
|
"grad_norm": 0.6607067309911566,
|
|
"learning_rate": 3.210414888144085e-05,
|
|
"loss": 0.3554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3499111831188202,
|
|
"step": 1575,
|
|
"valid_targets_mean": 4779.9,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 2.55663430420712,
|
|
"grad_norm": 0.702948411258421,
|
|
"learning_rate": 3.203980904563467e-05,
|
|
"loss": 0.3398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3248726725578308,
|
|
"step": 1580,
|
|
"valid_targets_mean": 2915.0,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 2.5647249190938513,
|
|
"grad_norm": 0.6628115331326496,
|
|
"learning_rate": 3.1975273194535365e-05,
|
|
"loss": 0.3702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35476428270339966,
|
|
"step": 1585,
|
|
"valid_targets_mean": 3659.4,
|
|
"valid_targets_min": 1228
|
|
},
|
|
{
|
|
"epoch": 2.5728155339805827,
|
|
"grad_norm": 0.6615270179204286,
|
|
"learning_rate": 3.191054237882519e-05,
|
|
"loss": 0.371,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35199153423309326,
|
|
"step": 1590,
|
|
"valid_targets_mean": 4081.2,
|
|
"valid_targets_min": 1116
|
|
},
|
|
{
|
|
"epoch": 2.5809061488673137,
|
|
"grad_norm": 0.6626160538531557,
|
|
"learning_rate": 3.1845617652360556e-05,
|
|
"loss": 0.3478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38913512229919434,
|
|
"step": 1595,
|
|
"valid_targets_mean": 4127.9,
|
|
"valid_targets_min": 1468
|
|
},
|
|
{
|
|
"epoch": 2.588996763754045,
|
|
"grad_norm": 0.5686585923437388,
|
|
"learning_rate": 3.178050007215483e-05,
|
|
"loss": 0.3356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3361608386039734,
|
|
"step": 1600,
|
|
"valid_targets_mean": 4937.8,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 2.5970873786407767,
|
|
"grad_norm": 2.2272207542575115,
|
|
"learning_rate": 3.171519069836118e-05,
|
|
"loss": 0.3504,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3982278108596802,
|
|
"step": 1605,
|
|
"valid_targets_mean": 2787.6,
|
|
"valid_targets_min": 1029
|
|
},
|
|
{
|
|
"epoch": 2.605177993527508,
|
|
"grad_norm": 0.6695229671580348,
|
|
"learning_rate": 3.1649690594255265e-05,
|
|
"loss": 0.3729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34326860308647156,
|
|
"step": 1610,
|
|
"valid_targets_mean": 3278.9,
|
|
"valid_targets_min": 1251
|
|
},
|
|
{
|
|
"epoch": 2.6132686084142396,
|
|
"grad_norm": 0.661728761043615,
|
|
"learning_rate": 3.1584000826217974e-05,
|
|
"loss": 0.358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3541804850101471,
|
|
"step": 1615,
|
|
"valid_targets_mean": 4680.7,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 2.6213592233009706,
|
|
"grad_norm": 0.8113177984942129,
|
|
"learning_rate": 3.151812246371802e-05,
|
|
"loss": 0.3673,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3555559515953064,
|
|
"step": 1620,
|
|
"valid_targets_mean": 4892.4,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 2.6294498381877025,
|
|
"grad_norm": 0.576765401036298,
|
|
"learning_rate": 3.145205657929454e-05,
|
|
"loss": 0.3651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34818094968795776,
|
|
"step": 1625,
|
|
"valid_targets_mean": 4213.6,
|
|
"valid_targets_min": 1228
|
|
},
|
|
{
|
|
"epoch": 2.6375404530744335,
|
|
"grad_norm": 0.6921256308734074,
|
|
"learning_rate": 3.1385804248539665e-05,
|
|
"loss": 0.3621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34675049781799316,
|
|
"step": 1630,
|
|
"valid_targets_mean": 3643.2,
|
|
"valid_targets_min": 774
|
|
},
|
|
{
|
|
"epoch": 2.645631067961165,
|
|
"grad_norm": 0.5734997693188809,
|
|
"learning_rate": 3.131936655008097e-05,
|
|
"loss": 0.3329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3277677595615387,
|
|
"step": 1635,
|
|
"valid_targets_mean": 3834.2,
|
|
"valid_targets_min": 799
|
|
},
|
|
{
|
|
"epoch": 2.6537216828478964,
|
|
"grad_norm": 0.6176988613698919,
|
|
"learning_rate": 3.125274456556392e-05,
|
|
"loss": 0.334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33639439940452576,
|
|
"step": 1640,
|
|
"valid_targets_mean": 4379.6,
|
|
"valid_targets_min": 1377
|
|
},
|
|
{
|
|
"epoch": 2.661812297734628,
|
|
"grad_norm": 0.6544172101213054,
|
|
"learning_rate": 3.1185939379634274e-05,
|
|
"loss": 0.3336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35664668679237366,
|
|
"step": 1645,
|
|
"valid_targets_mean": 3656.1,
|
|
"valid_targets_min": 1206
|
|
},
|
|
{
|
|
"epoch": 2.6699029126213594,
|
|
"grad_norm": 0.5477059231493232,
|
|
"learning_rate": 3.111895207992042e-05,
|
|
"loss": 0.3293,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30970466136932373,
|
|
"step": 1650,
|
|
"valid_targets_mean": 4599.8,
|
|
"valid_targets_min": 1474
|
|
},
|
|
{
|
|
"epoch": 2.6779935275080904,
|
|
"grad_norm": 0.7029760332920263,
|
|
"learning_rate": 3.1051783757015686e-05,
|
|
"loss": 0.3417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3542120158672333,
|
|
"step": 1655,
|
|
"valid_targets_mean": 3595.6,
|
|
"valid_targets_min": 1424
|
|
},
|
|
{
|
|
"epoch": 2.686084142394822,
|
|
"grad_norm": 0.6498836452249998,
|
|
"learning_rate": 3.098443550446051e-05,
|
|
"loss": 0.3479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3418267071247101,
|
|
"step": 1660,
|
|
"valid_targets_mean": 3510.6,
|
|
"valid_targets_min": 1446
|
|
},
|
|
{
|
|
"epoch": 2.6941747572815533,
|
|
"grad_norm": 1.2211153218964443,
|
|
"learning_rate": 3.0916908418724765e-05,
|
|
"loss": 0.3491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3544435203075409,
|
|
"step": 1665,
|
|
"valid_targets_mean": 4430.4,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 2.7022653721682848,
|
|
"grad_norm": 1.2099387065339355,
|
|
"learning_rate": 3.0849203599189776e-05,
|
|
"loss": 0.3453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3719382882118225,
|
|
"step": 1670,
|
|
"valid_targets_mean": 3609.2,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 2.7103559870550162,
|
|
"grad_norm": 0.6608903398848406,
|
|
"learning_rate": 3.0781322148130514e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30829671025276184,
|
|
"step": 1675,
|
|
"valid_targets_mean": 3646.9,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 2.7184466019417477,
|
|
"grad_norm": 0.6424461992050344,
|
|
"learning_rate": 3.071326517069761e-05,
|
|
"loss": 0.3307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3524508774280548,
|
|
"step": 1680,
|
|
"valid_targets_mean": 3864.7,
|
|
"valid_targets_min": 1537
|
|
},
|
|
{
|
|
"epoch": 2.726537216828479,
|
|
"grad_norm": 0.6088892936229023,
|
|
"learning_rate": 3.064503377489936e-05,
|
|
"loss": 0.3472,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35562074184417725,
|
|
"step": 1685,
|
|
"valid_targets_mean": 4292.1,
|
|
"valid_targets_min": 1626
|
|
},
|
|
{
|
|
"epoch": 2.73462783171521,
|
|
"grad_norm": 0.8310379687378989,
|
|
"learning_rate": 3.0576629071583704e-05,
|
|
"loss": 0.3516,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3787177801132202,
|
|
"step": 1690,
|
|
"valid_targets_mean": 2578.6,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 2.7427184466019416,
|
|
"grad_norm": 0.7336795250162784,
|
|
"learning_rate": 3.0508052174420132e-05,
|
|
"loss": 0.3358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34328681230545044,
|
|
"step": 1695,
|
|
"valid_targets_mean": 3745.4,
|
|
"valid_targets_min": 1339
|
|
},
|
|
{
|
|
"epoch": 2.750809061488673,
|
|
"grad_norm": 0.5761292250835377,
|
|
"learning_rate": 3.0439304199881543e-05,
|
|
"loss": 0.3144,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2521141767501831,
|
|
"step": 1700,
|
|
"valid_targets_mean": 6403.2,
|
|
"valid_targets_min": 1724
|
|
},
|
|
{
|
|
"epoch": 2.7588996763754046,
|
|
"grad_norm": 0.47192752380407144,
|
|
"learning_rate": 3.03703862672261e-05,
|
|
"loss": 0.24,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2625490427017212,
|
|
"step": 1705,
|
|
"valid_targets_mean": 6242.4,
|
|
"valid_targets_min": 1861
|
|
},
|
|
{
|
|
"epoch": 2.766990291262136,
|
|
"grad_norm": 0.4692838219469033,
|
|
"learning_rate": 3.030129949847895e-05,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24692867696285248,
|
|
"step": 1710,
|
|
"valid_targets_mean": 5536.4,
|
|
"valid_targets_min": 3078
|
|
},
|
|
{
|
|
"epoch": 2.775080906148867,
|
|
"grad_norm": 0.4695104746189952,
|
|
"learning_rate": 3.023204501841403e-05,
|
|
"loss": 0.2292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22257234156131744,
|
|
"step": 1715,
|
|
"valid_targets_mean": 5897.4,
|
|
"valid_targets_min": 2340
|
|
},
|
|
{
|
|
"epoch": 2.783171521035599,
|
|
"grad_norm": 0.46675952644311963,
|
|
"learning_rate": 3.01626239545357e-05,
|
|
"loss": 0.2377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24739572405815125,
|
|
"step": 1720,
|
|
"valid_targets_mean": 6239.1,
|
|
"valid_targets_min": 2926
|
|
},
|
|
{
|
|
"epoch": 2.79126213592233,
|
|
"grad_norm": 0.46028576559446716,
|
|
"learning_rate": 3.00930374370604e-05,
|
|
"loss": 0.2022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19058983027935028,
|
|
"step": 1725,
|
|
"valid_targets_mean": 6132.8,
|
|
"valid_targets_min": 3165
|
|
},
|
|
{
|
|
"epoch": 2.7993527508090614,
|
|
"grad_norm": 0.4338650036638946,
|
|
"learning_rate": 3.002328659889826e-05,
|
|
"loss": 0.2091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19038927555084229,
|
|
"step": 1730,
|
|
"valid_targets_mean": 5869.2,
|
|
"valid_targets_min": 2200
|
|
},
|
|
{
|
|
"epoch": 2.807443365695793,
|
|
"grad_norm": 0.4095273507413556,
|
|
"learning_rate": 2.995337257563466e-05,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19616976380348206,
|
|
"step": 1735,
|
|
"valid_targets_mean": 6250.8,
|
|
"valid_targets_min": 552
|
|
},
|
|
{
|
|
"epoch": 2.8155339805825244,
|
|
"grad_norm": 0.4397121045961578,
|
|
"learning_rate": 2.9883296505511704e-05,
|
|
"loss": 0.2235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.254894882440567,
|
|
"step": 1740,
|
|
"valid_targets_mean": 6930.5,
|
|
"valid_targets_min": 4391
|
|
},
|
|
{
|
|
"epoch": 2.823624595469256,
|
|
"grad_norm": 0.46668357421287665,
|
|
"learning_rate": 2.981305952940973e-05,
|
|
"loss": 0.2179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20420300960540771,
|
|
"step": 1745,
|
|
"valid_targets_mean": 5333.6,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 2.831715210355987,
|
|
"grad_norm": 0.46764197228804283,
|
|
"learning_rate": 2.9742662790828732e-05,
|
|
"loss": 0.2216,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21579869091510773,
|
|
"step": 1750,
|
|
"valid_targets_mean": 6891.6,
|
|
"valid_targets_min": 4008
|
|
},
|
|
{
|
|
"epoch": 2.8398058252427183,
|
|
"grad_norm": 0.48354599335481646,
|
|
"learning_rate": 2.9672107435869727e-05,
|
|
"loss": 0.1911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21376736462116241,
|
|
"step": 1755,
|
|
"valid_targets_mean": 5651.2,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 2.8478964401294498,
|
|
"grad_norm": 0.5475714799134836,
|
|
"learning_rate": 2.9601394613216086e-05,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2783951759338379,
|
|
"step": 1760,
|
|
"valid_targets_mean": 5771.6,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 2.855987055016181,
|
|
"grad_norm": 0.48845769089035257,
|
|
"learning_rate": 2.953052547411487e-05,
|
|
"loss": 0.2261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19538253545761108,
|
|
"step": 1765,
|
|
"valid_targets_mean": 5659.5,
|
|
"valid_targets_min": 3276
|
|
},
|
|
{
|
|
"epoch": 2.8640776699029127,
|
|
"grad_norm": 0.4351545792992896,
|
|
"learning_rate": 2.9459501172358046e-05,
|
|
"loss": 0.2332,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24069242179393768,
|
|
"step": 1770,
|
|
"valid_targets_mean": 6256.8,
|
|
"valid_targets_min": 4223
|
|
},
|
|
{
|
|
"epoch": 2.872168284789644,
|
|
"grad_norm": 0.444626387049783,
|
|
"learning_rate": 2.9388322864263747e-05,
|
|
"loss": 0.2347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18499356508255005,
|
|
"step": 1775,
|
|
"valid_targets_mean": 5226.5,
|
|
"valid_targets_min": 900
|
|
},
|
|
{
|
|
"epoch": 2.8802588996763756,
|
|
"grad_norm": 0.49978310609608434,
|
|
"learning_rate": 2.9316991708657382e-05,
|
|
"loss": 0.2182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24685606360435486,
|
|
"step": 1780,
|
|
"valid_targets_mean": 6908.9,
|
|
"valid_targets_min": 2907
|
|
},
|
|
{
|
|
"epoch": 2.8883495145631066,
|
|
"grad_norm": 0.45780929753555405,
|
|
"learning_rate": 2.924550886685285e-05,
|
|
"loss": 0.2298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22891566157341003,
|
|
"step": 1785,
|
|
"valid_targets_mean": 5650.2,
|
|
"valid_targets_min": 3626
|
|
},
|
|
{
|
|
"epoch": 2.896440129449838,
|
|
"grad_norm": 0.3931202774620866,
|
|
"learning_rate": 2.917387550263357e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18989673256874084,
|
|
"step": 1790,
|
|
"valid_targets_mean": 6715.2,
|
|
"valid_targets_min": 2968
|
|
},
|
|
{
|
|
"epoch": 2.9045307443365695,
|
|
"grad_norm": 0.45281393221699767,
|
|
"learning_rate": 2.9102092782233563e-05,
|
|
"loss": 0.2215,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21978428959846497,
|
|
"step": 1795,
|
|
"valid_targets_mean": 5944.2,
|
|
"valid_targets_min": 3462
|
|
},
|
|
{
|
|
"epoch": 2.912621359223301,
|
|
"grad_norm": 0.45339382448201765,
|
|
"learning_rate": 2.9030161874318455e-05,
|
|
"loss": 0.2291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2767179608345032,
|
|
"step": 1800,
|
|
"valid_targets_mean": 7132.8,
|
|
"valid_targets_min": 3017
|
|
},
|
|
{
|
|
"epoch": 2.9207119741100325,
|
|
"grad_norm": 0.5075746973420298,
|
|
"learning_rate": 2.895808394996644e-05,
|
|
"loss": 0.2012,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2105654776096344,
|
|
"step": 1805,
|
|
"valid_targets_mean": 5465.3,
|
|
"valid_targets_min": 3731
|
|
},
|
|
{
|
|
"epoch": 2.9288025889967635,
|
|
"grad_norm": 0.4074364881732189,
|
|
"learning_rate": 2.8885860182649263e-05,
|
|
"loss": 0.1959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18284833431243896,
|
|
"step": 1810,
|
|
"valid_targets_mean": 6481.9,
|
|
"valid_targets_min": 1048
|
|
},
|
|
{
|
|
"epoch": 2.9368932038834954,
|
|
"grad_norm": 0.5177652651451281,
|
|
"learning_rate": 2.881349174821305e-05,
|
|
"loss": 0.2304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18447479605674744,
|
|
"step": 1815,
|
|
"valid_targets_mean": 5724.3,
|
|
"valid_targets_min": 3426
|
|
},
|
|
{
|
|
"epoch": 2.9449838187702264,
|
|
"grad_norm": 0.47895059311957167,
|
|
"learning_rate": 2.8740979824859194e-05,
|
|
"loss": 0.2046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20376095175743103,
|
|
"step": 1820,
|
|
"valid_targets_mean": 5310.1,
|
|
"valid_targets_min": 1463
|
|
},
|
|
{
|
|
"epoch": 2.953074433656958,
|
|
"grad_norm": 0.49423974656062747,
|
|
"learning_rate": 2.8668325593125192e-05,
|
|
"loss": 0.2175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23879647254943848,
|
|
"step": 1825,
|
|
"valid_targets_mean": 5651.5,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 2.9611650485436893,
|
|
"grad_norm": 0.44390854244120626,
|
|
"learning_rate": 2.8595530235865397e-05,
|
|
"loss": 0.2085,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21764904260635376,
|
|
"step": 1830,
|
|
"valid_targets_mean": 6370.6,
|
|
"valid_targets_min": 3513
|
|
},
|
|
{
|
|
"epoch": 2.969255663430421,
|
|
"grad_norm": 0.6114626506498663,
|
|
"learning_rate": 2.8522594938231755e-05,
|
|
"loss": 0.2412,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.261320561170578,
|
|
"step": 1835,
|
|
"valid_targets_mean": 7567.2,
|
|
"valid_targets_min": 3890
|
|
},
|
|
{
|
|
"epoch": 2.9773462783171523,
|
|
"grad_norm": 0.467812383161166,
|
|
"learning_rate": 2.8449520887654558e-05,
|
|
"loss": 0.228,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2207464873790741,
|
|
"step": 1840,
|
|
"valid_targets_mean": 6544.5,
|
|
"valid_targets_min": 3523
|
|
},
|
|
{
|
|
"epoch": 2.9854368932038833,
|
|
"grad_norm": 0.4791034057933847,
|
|
"learning_rate": 2.8376309273823047e-05,
|
|
"loss": 0.2196,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18718640506267548,
|
|
"step": 1845,
|
|
"valid_targets_mean": 5593.0,
|
|
"valid_targets_min": 1614
|
|
},
|
|
{
|
|
"epoch": 2.9935275080906147,
|
|
"grad_norm": 0.4117756326042873,
|
|
"learning_rate": 2.8302961288666094e-05,
|
|
"loss": 0.2248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20447809994220734,
|
|
"step": 1850,
|
|
"valid_targets_mean": 6863.6,
|
|
"valid_targets_min": 3648
|
|
},
|
|
{
|
|
"epoch": 3.001618122977346,
|
|
"grad_norm": 0.47583421251030267,
|
|
"learning_rate": 2.8229478126332766e-05,
|
|
"loss": 0.2224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24508079886436462,
|
|
"step": 1855,
|
|
"valid_targets_mean": 6507.2,
|
|
"valid_targets_min": 1969
|
|
},
|
|
{
|
|
"epoch": 3.0097087378640777,
|
|
"grad_norm": 0.45872688387507193,
|
|
"learning_rate": 2.815586098317291e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2711004614830017,
|
|
"step": 1860,
|
|
"valid_targets_mean": 6547.6,
|
|
"valid_targets_min": 2822
|
|
},
|
|
{
|
|
"epoch": 3.017799352750809,
|
|
"grad_norm": 0.47429826815585996,
|
|
"learning_rate": 2.8082111057717655e-05,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2886805534362793,
|
|
"step": 1865,
|
|
"valid_targets_mean": 7456.8,
|
|
"valid_targets_min": 1676
|
|
},
|
|
{
|
|
"epoch": 3.0258899676375406,
|
|
"grad_norm": 0.5015651427456486,
|
|
"learning_rate": 2.8008229550659883e-05,
|
|
"loss": 0.3135,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3145654797554016,
|
|
"step": 1870,
|
|
"valid_targets_mean": 7212.2,
|
|
"valid_targets_min": 3800
|
|
},
|
|
{
|
|
"epoch": 3.033980582524272,
|
|
"grad_norm": 0.47188782403478186,
|
|
"learning_rate": 2.793421766483474e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24573259055614471,
|
|
"step": 1875,
|
|
"valid_targets_mean": 7030.2,
|
|
"valid_targets_min": 3738
|
|
},
|
|
{
|
|
"epoch": 3.042071197411003,
|
|
"grad_norm": 0.43763390517046236,
|
|
"learning_rate": 2.7860076605199995e-05,
|
|
"loss": 0.291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2639431357383728,
|
|
"step": 1880,
|
|
"valid_targets_mean": 6541.8,
|
|
"valid_targets_min": 469
|
|
},
|
|
{
|
|
"epoch": 3.0501618122977345,
|
|
"grad_norm": 0.44172809849782685,
|
|
"learning_rate": 2.7785807578816448e-05,
|
|
"loss": 0.3262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29740840196609497,
|
|
"step": 1885,
|
|
"valid_targets_mean": 7401.4,
|
|
"valid_targets_min": 2854
|
|
},
|
|
{
|
|
"epoch": 3.058252427184466,
|
|
"grad_norm": 0.429012811299429,
|
|
"learning_rate": 2.7711411794828274e-05,
|
|
"loss": 0.3087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3050397038459778,
|
|
"step": 1890,
|
|
"valid_targets_mean": 8668.2,
|
|
"valid_targets_min": 3365
|
|
},
|
|
{
|
|
"epoch": 3.0663430420711975,
|
|
"grad_norm": 0.4488913116405302,
|
|
"learning_rate": 2.7636890464443333e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24772655963897705,
|
|
"step": 1895,
|
|
"valid_targets_mean": 6754.3,
|
|
"valid_targets_min": 2100
|
|
},
|
|
{
|
|
"epoch": 3.074433656957929,
|
|
"grad_norm": 0.44216597098526006,
|
|
"learning_rate": 2.756224480091347e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3219381868839264,
|
|
"step": 1900,
|
|
"valid_targets_mean": 8038.2,
|
|
"valid_targets_min": 3704
|
|
},
|
|
{
|
|
"epoch": 3.0825242718446604,
|
|
"grad_norm": 0.49388425110906853,
|
|
"learning_rate": 2.7487476019514726e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3122866749763489,
|
|
"step": 1905,
|
|
"valid_targets_mean": 6892.1,
|
|
"valid_targets_min": 1736
|
|
},
|
|
{
|
|
"epoch": 3.0906148867313914,
|
|
"grad_norm": 0.45296171291930487,
|
|
"learning_rate": 2.74125853375276e-05,
|
|
"loss": 0.3001,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2828711271286011,
|
|
"step": 1910,
|
|
"valid_targets_mean": 7149.5,
|
|
"valid_targets_min": 3606
|
|
},
|
|
{
|
|
"epoch": 3.098705501618123,
|
|
"grad_norm": 0.5691072351705835,
|
|
"learning_rate": 2.7337573974217177e-05,
|
|
"loss": 0.32,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2833087742328644,
|
|
"step": 1915,
|
|
"valid_targets_mean": 6004.5,
|
|
"valid_targets_min": 306
|
|
},
|
|
{
|
|
"epoch": 3.1067961165048543,
|
|
"grad_norm": 0.4162881780463123,
|
|
"learning_rate": 2.726244315081334e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25544559955596924,
|
|
"step": 1920,
|
|
"valid_targets_mean": 7372.0,
|
|
"valid_targets_min": 4593
|
|
},
|
|
{
|
|
"epoch": 3.114886731391586,
|
|
"grad_norm": 0.43692064078458287,
|
|
"learning_rate": 2.718719409049082e-05,
|
|
"loss": 0.2995,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34150227904319763,
|
|
"step": 1925,
|
|
"valid_targets_mean": 7420.4,
|
|
"valid_targets_min": 3842
|
|
},
|
|
{
|
|
"epoch": 3.1229773462783172,
|
|
"grad_norm": 0.501069700833799,
|
|
"learning_rate": 2.711182801834933e-05,
|
|
"loss": 0.3217,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35399535298347473,
|
|
"step": 1930,
|
|
"valid_targets_mean": 8089.4,
|
|
"valid_targets_min": 3071
|
|
},
|
|
{
|
|
"epoch": 3.1310679611650487,
|
|
"grad_norm": 0.5547118933791086,
|
|
"learning_rate": 2.7036346161393617e-05,
|
|
"loss": 0.3046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2902340888977051,
|
|
"step": 1935,
|
|
"valid_targets_mean": 6877.0,
|
|
"valid_targets_min": 3654
|
|
},
|
|
{
|
|
"epoch": 3.1391585760517797,
|
|
"grad_norm": 0.44034139813676154,
|
|
"learning_rate": 2.696074974851346e-05,
|
|
"loss": 0.2844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29094940423965454,
|
|
"step": 1940,
|
|
"valid_targets_mean": 8145.8,
|
|
"valid_targets_min": 2130
|
|
},
|
|
{
|
|
"epoch": 3.147249190938511,
|
|
"grad_norm": 0.5016943552371228,
|
|
"learning_rate": 2.688504001046367e-05,
|
|
"loss": 0.2867,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3496394157409668,
|
|
"step": 1945,
|
|
"valid_targets_mean": 7568.6,
|
|
"valid_targets_min": 3435
|
|
},
|
|
{
|
|
"epoch": 3.1553398058252426,
|
|
"grad_norm": 0.48325348045886846,
|
|
"learning_rate": 2.6809218179844085e-05,
|
|
"loss": 0.3013,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26768216490745544,
|
|
"step": 1950,
|
|
"valid_targets_mean": 6097.7,
|
|
"valid_targets_min": 1161
|
|
},
|
|
{
|
|
"epoch": 3.163430420711974,
|
|
"grad_norm": 0.487336193105687,
|
|
"learning_rate": 2.6733285491079453e-05,
|
|
"loss": 0.2994,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33538806438446045,
|
|
"step": 1955,
|
|
"valid_targets_mean": 6649.2,
|
|
"valid_targets_min": 2993
|
|
},
|
|
{
|
|
"epoch": 3.1715210355987056,
|
|
"grad_norm": 0.4703762210934678,
|
|
"learning_rate": 2.6657243180399373e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2592100501060486,
|
|
"step": 1960,
|
|
"valid_targets_mean": 6283.1,
|
|
"valid_targets_min": 2278
|
|
},
|
|
{
|
|
"epoch": 3.179611650485437,
|
|
"grad_norm": 0.5068944167952791,
|
|
"learning_rate": 2.658109248581814e-05,
|
|
"loss": 0.3136,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30327221751213074,
|
|
"step": 1965,
|
|
"valid_targets_mean": 7856.1,
|
|
"valid_targets_min": 3609
|
|
},
|
|
{
|
|
"epoch": 3.1877022653721685,
|
|
"grad_norm": 0.47468151640087014,
|
|
"learning_rate": 2.650483464711462e-05,
|
|
"loss": 0.2941,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3432081341743469,
|
|
"step": 1970,
|
|
"valid_targets_mean": 8008.8,
|
|
"valid_targets_min": 1165
|
|
},
|
|
{
|
|
"epoch": 3.1957928802588995,
|
|
"grad_norm": 0.5084724878869032,
|
|
"learning_rate": 2.6428470905812047e-05,
|
|
"loss": 0.2661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23990876972675323,
|
|
"step": 1975,
|
|
"valid_targets_mean": 7456.1,
|
|
"valid_targets_min": 2778
|
|
},
|
|
{
|
|
"epoch": 3.203883495145631,
|
|
"grad_norm": 0.6172383982749374,
|
|
"learning_rate": 2.6352002505157802e-05,
|
|
"loss": 0.3143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3006366491317749,
|
|
"step": 1980,
|
|
"valid_targets_mean": 4243.0,
|
|
"valid_targets_min": 1153
|
|
},
|
|
{
|
|
"epoch": 3.2119741100323624,
|
|
"grad_norm": 0.7978962996020116,
|
|
"learning_rate": 2.6275430690103188e-05,
|
|
"loss": 0.4157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.43584519624710083,
|
|
"step": 1985,
|
|
"valid_targets_mean": 2861.2,
|
|
"valid_targets_min": 1460
|
|
},
|
|
{
|
|
"epoch": 3.220064724919094,
|
|
"grad_norm": 0.9313628986194699,
|
|
"learning_rate": 2.6198756707283153e-05,
|
|
"loss": 0.4145,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4482871890068054,
|
|
"step": 1990,
|
|
"valid_targets_mean": 2436.8,
|
|
"valid_targets_min": 1108
|
|
},
|
|
{
|
|
"epoch": 3.2281553398058254,
|
|
"grad_norm": 0.7634666137781282,
|
|
"learning_rate": 2.612198180499601e-05,
|
|
"loss": 0.3616,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3643973171710968,
|
|
"step": 1995,
|
|
"valid_targets_mean": 3268.4,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 3.236245954692557,
|
|
"grad_norm": 0.8517352332533736,
|
|
"learning_rate": 2.604510723318309e-05,
|
|
"loss": 0.4081,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4433375895023346,
|
|
"step": 2000,
|
|
"valid_targets_mean": 2590.0,
|
|
"valid_targets_min": 872
|
|
},
|
|
{
|
|
"epoch": 3.244336569579288,
|
|
"grad_norm": 0.7252301434029291,
|
|
"learning_rate": 2.5968134243408405e-05,
|
|
"loss": 0.3972,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39637094736099243,
|
|
"step": 2005,
|
|
"valid_targets_mean": 3088.1,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 3.2524271844660193,
|
|
"grad_norm": 0.8139379244117145,
|
|
"learning_rate": 2.5891064088838278e-05,
|
|
"loss": 0.3925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3841778039932251,
|
|
"step": 2010,
|
|
"valid_targets_mean": 2681.4,
|
|
"valid_targets_min": 1213
|
|
},
|
|
{
|
|
"epoch": 3.2605177993527508,
|
|
"grad_norm": 0.7212261838585554,
|
|
"learning_rate": 2.5813898024220912e-05,
|
|
"loss": 0.3925,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3293357491493225,
|
|
"step": 2015,
|
|
"valid_targets_mean": 3165.6,
|
|
"valid_targets_min": 1467
|
|
},
|
|
{
|
|
"epoch": 3.2686084142394822,
|
|
"grad_norm": 0.7493586150342039,
|
|
"learning_rate": 2.573663730586601e-05,
|
|
"loss": 0.3873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40346598625183105,
|
|
"step": 2020,
|
|
"valid_targets_mean": 3995.6,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 3.2766990291262137,
|
|
"grad_norm": 0.8133579178643173,
|
|
"learning_rate": 2.5659283191624277e-05,
|
|
"loss": 0.377,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.4063666760921478,
|
|
"step": 2025,
|
|
"valid_targets_mean": 3736.4,
|
|
"valid_targets_min": 1311
|
|
},
|
|
{
|
|
"epoch": 3.284789644012945,
|
|
"grad_norm": 0.9016208263184244,
|
|
"learning_rate": 2.5581836940866967e-05,
|
|
"loss": 0.4024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41699790954589844,
|
|
"step": 2030,
|
|
"valid_targets_mean": 3050.8,
|
|
"valid_targets_min": 1126
|
|
},
|
|
{
|
|
"epoch": 3.292880258899676,
|
|
"grad_norm": 0.7467519616638045,
|
|
"learning_rate": 2.550429981446537e-05,
|
|
"loss": 0.3821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37695205211639404,
|
|
"step": 2035,
|
|
"valid_targets_mean": 2968.2,
|
|
"valid_targets_min": 1254
|
|
},
|
|
{
|
|
"epoch": 3.3009708737864076,
|
|
"grad_norm": 0.679997759441785,
|
|
"learning_rate": 2.542667307477027e-05,
|
|
"loss": 0.3803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3674064576625824,
|
|
"step": 2040,
|
|
"valid_targets_mean": 3811.7,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 3.309061488673139,
|
|
"grad_norm": 0.7019193542215713,
|
|
"learning_rate": 2.534895798559144e-05,
|
|
"loss": 0.3669,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3509365916252136,
|
|
"step": 2045,
|
|
"valid_targets_mean": 4138.7,
|
|
"valid_targets_min": 1290
|
|
},
|
|
{
|
|
"epoch": 3.3171521035598706,
|
|
"grad_norm": 0.7644981967082519,
|
|
"learning_rate": 2.527115581217702e-05,
|
|
"loss": 0.3554,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36790961027145386,
|
|
"step": 2050,
|
|
"valid_targets_mean": 3372.7,
|
|
"valid_targets_min": 1693
|
|
},
|
|
{
|
|
"epoch": 3.325242718446602,
|
|
"grad_norm": 0.8324542130119277,
|
|
"learning_rate": 2.5193267821192914e-05,
|
|
"loss": 0.3739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3999173045158386,
|
|
"step": 2055,
|
|
"valid_targets_mean": 2886.9,
|
|
"valid_targets_min": 1027
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.7251311605581526,
|
|
"learning_rate": 2.5115295280702222e-05,
|
|
"loss": 0.3637,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35481077432632446,
|
|
"step": 2060,
|
|
"valid_targets_mean": 3630.1,
|
|
"valid_targets_min": 1032
|
|
},
|
|
{
|
|
"epoch": 3.341423948220065,
|
|
"grad_norm": 0.8994421585493918,
|
|
"learning_rate": 2.5037239460144534e-05,
|
|
"loss": 0.3703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.348206490278244,
|
|
"step": 2065,
|
|
"valid_targets_mean": 2775.9,
|
|
"valid_targets_min": 1173
|
|
},
|
|
{
|
|
"epoch": 3.349514563106796,
|
|
"grad_norm": 0.796492511881728,
|
|
"learning_rate": 2.4959101630315315e-05,
|
|
"loss": 0.3693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.362110435962677,
|
|
"step": 2070,
|
|
"valid_targets_mean": 3605.9,
|
|
"valid_targets_min": 1897
|
|
},
|
|
{
|
|
"epoch": 3.3576051779935274,
|
|
"grad_norm": 1.3034529810821374,
|
|
"learning_rate": 2.4880883063345157e-05,
|
|
"loss": 0.3767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.366311252117157,
|
|
"step": 2075,
|
|
"valid_targets_mean": 2585.5,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 3.365695792880259,
|
|
"grad_norm": 0.8660088681232742,
|
|
"learning_rate": 2.480258503267912e-05,
|
|
"loss": 0.3744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39507314562797546,
|
|
"step": 2080,
|
|
"valid_targets_mean": 2860.4,
|
|
"valid_targets_min": 1009
|
|
},
|
|
{
|
|
"epoch": 3.3737864077669903,
|
|
"grad_norm": 0.6972722103618133,
|
|
"learning_rate": 2.472420881305599e-05,
|
|
"loss": 0.3603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3619586229324341,
|
|
"step": 2085,
|
|
"valid_targets_mean": 3527.3,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 3.381877022653722,
|
|
"grad_norm": 0.7218193777112525,
|
|
"learning_rate": 2.4645755680487497e-05,
|
|
"loss": 0.3873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3706960082054138,
|
|
"step": 2090,
|
|
"valid_targets_mean": 3451.2,
|
|
"valid_targets_min": 1500
|
|
},
|
|
{
|
|
"epoch": 3.389967637540453,
|
|
"grad_norm": 0.8084142195670189,
|
|
"learning_rate": 2.4567226912237566e-05,
|
|
"loss": 0.3871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35703185200691223,
|
|
"step": 2095,
|
|
"valid_targets_mean": 2787.2,
|
|
"valid_targets_min": 1151
|
|
},
|
|
{
|
|
"epoch": 3.3980582524271843,
|
|
"grad_norm": 0.6964508405963089,
|
|
"learning_rate": 2.4488623786801523e-05,
|
|
"loss": 0.3759,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3755350112915039,
|
|
"step": 2100,
|
|
"valid_targets_mean": 4092.3,
|
|
"valid_targets_min": 1302
|
|
},
|
|
{
|
|
"epoch": 3.4061488673139158,
|
|
"grad_norm": 0.8107522604064265,
|
|
"learning_rate": 2.4409947583885272e-05,
|
|
"loss": 0.3584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3875834345817566,
|
|
"step": 2105,
|
|
"valid_targets_mean": 2659.3,
|
|
"valid_targets_min": 1435
|
|
},
|
|
{
|
|
"epoch": 3.414239482200647,
|
|
"grad_norm": 0.7797309552660957,
|
|
"learning_rate": 2.4331199584384448e-05,
|
|
"loss": 0.37,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41315728425979614,
|
|
"step": 2110,
|
|
"valid_targets_mean": 3654.5,
|
|
"valid_targets_min": 2036
|
|
},
|
|
{
|
|
"epoch": 3.4223300970873787,
|
|
"grad_norm": 0.7938288617184067,
|
|
"learning_rate": 2.425238107036359e-05,
|
|
"loss": 0.3757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3548850119113922,
|
|
"step": 2115,
|
|
"valid_targets_mean": 2510.2,
|
|
"valid_targets_min": 1118
|
|
},
|
|
{
|
|
"epoch": 3.43042071197411,
|
|
"grad_norm": 0.9298235923393635,
|
|
"learning_rate": 2.4173493325035255e-05,
|
|
"loss": 0.3677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37875327467918396,
|
|
"step": 2120,
|
|
"valid_targets_mean": 2486.3,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 3.4385113268608416,
|
|
"grad_norm": 0.793729423432582,
|
|
"learning_rate": 2.4094537632739126e-05,
|
|
"loss": 0.3659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35467851161956787,
|
|
"step": 2125,
|
|
"valid_targets_mean": 3492.1,
|
|
"valid_targets_min": 1260
|
|
},
|
|
{
|
|
"epoch": 3.4466019417475726,
|
|
"grad_norm": 0.7849821970563073,
|
|
"learning_rate": 2.40155152789211e-05,
|
|
"loss": 0.3563,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34691619873046875,
|
|
"step": 2130,
|
|
"valid_targets_mean": 2875.5,
|
|
"valid_targets_min": 1062
|
|
},
|
|
{
|
|
"epoch": 3.454692556634304,
|
|
"grad_norm": 0.7693298480722005,
|
|
"learning_rate": 2.3936427550112375e-05,
|
|
"loss": 0.3574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40094462037086487,
|
|
"step": 2135,
|
|
"valid_targets_mean": 3563.5,
|
|
"valid_targets_min": 1491
|
|
},
|
|
{
|
|
"epoch": 3.4627831715210355,
|
|
"grad_norm": 0.6373126854145776,
|
|
"learning_rate": 2.3857275733908476e-05,
|
|
"loss": 0.3352,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33306121826171875,
|
|
"step": 2140,
|
|
"valid_targets_mean": 4153.3,
|
|
"valid_targets_min": 1248
|
|
},
|
|
{
|
|
"epoch": 3.470873786407767,
|
|
"grad_norm": 0.7673184920625664,
|
|
"learning_rate": 2.377806111894832e-05,
|
|
"loss": 0.3478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.40561556816101074,
|
|
"step": 2145,
|
|
"valid_targets_mean": 3383.9,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 3.4789644012944985,
|
|
"grad_norm": 0.6215117682240084,
|
|
"learning_rate": 2.3698784994893214e-05,
|
|
"loss": 0.338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3328400254249573,
|
|
"step": 2150,
|
|
"valid_targets_mean": 4350.6,
|
|
"valid_targets_min": 1351
|
|
},
|
|
{
|
|
"epoch": 3.48705501618123,
|
|
"grad_norm": 0.77504169798237,
|
|
"learning_rate": 2.3619448652405885e-05,
|
|
"loss": 0.3318,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32707396149635315,
|
|
"step": 2155,
|
|
"valid_targets_mean": 4064.1,
|
|
"valid_targets_min": 1670
|
|
},
|
|
{
|
|
"epoch": 3.4951456310679614,
|
|
"grad_norm": 0.5917729762982872,
|
|
"learning_rate": 2.3540053383129458e-05,
|
|
"loss": 0.3229,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.313139945268631,
|
|
"step": 2160,
|
|
"valid_targets_mean": 4482.6,
|
|
"valid_targets_min": 1115
|
|
},
|
|
{
|
|
"epoch": 3.5032362459546924,
|
|
"grad_norm": 0.7182867250889213,
|
|
"learning_rate": 2.346060047966638e-05,
|
|
"loss": 0.3691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3666308522224426,
|
|
"step": 2165,
|
|
"valid_targets_mean": 3469.6,
|
|
"valid_targets_min": 1529
|
|
},
|
|
{
|
|
"epoch": 3.511326860841424,
|
|
"grad_norm": 0.6390375106609838,
|
|
"learning_rate": 2.3381091235557475e-05,
|
|
"loss": 0.3458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3377194404602051,
|
|
"step": 2170,
|
|
"valid_targets_mean": 3768.1,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 3.5194174757281553,
|
|
"grad_norm": 0.654587158888464,
|
|
"learning_rate": 2.330152694526077e-05,
|
|
"loss": 0.3184,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33807533979415894,
|
|
"step": 2175,
|
|
"valid_targets_mean": 4148.2,
|
|
"valid_targets_min": 1399
|
|
},
|
|
{
|
|
"epoch": 3.527508090614887,
|
|
"grad_norm": 0.6152407643263405,
|
|
"learning_rate": 2.322190890413053e-05,
|
|
"loss": 0.3224,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30578580498695374,
|
|
"step": 2180,
|
|
"valid_targets_mean": 4575.1,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 3.5355987055016183,
|
|
"grad_norm": 0.6712841496569436,
|
|
"learning_rate": 2.3142238408396076e-05,
|
|
"loss": 0.3475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3307744860649109,
|
|
"step": 2185,
|
|
"valid_targets_mean": 4259.9,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 3.5436893203883493,
|
|
"grad_norm": 0.672919162530352,
|
|
"learning_rate": 2.3062516755140733e-05,
|
|
"loss": 0.3256,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.339915931224823,
|
|
"step": 2190,
|
|
"valid_targets_mean": 4558.8,
|
|
"valid_targets_min": 1336
|
|
},
|
|
{
|
|
"epoch": 3.551779935275081,
|
|
"grad_norm": 0.7516148005411548,
|
|
"learning_rate": 2.2982745242280716e-05,
|
|
"loss": 0.3257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34565508365631104,
|
|
"step": 2195,
|
|
"valid_targets_mean": 4438.0,
|
|
"valid_targets_min": 1266
|
|
},
|
|
{
|
|
"epoch": 3.559870550161812,
|
|
"grad_norm": 0.6032399842230928,
|
|
"learning_rate": 2.290292516854396e-05,
|
|
"loss": 0.3194,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31961023807525635,
|
|
"step": 2200,
|
|
"valid_targets_mean": 4544.9,
|
|
"valid_targets_min": 1267
|
|
},
|
|
{
|
|
"epoch": 3.5679611650485437,
|
|
"grad_norm": 0.6327368151188703,
|
|
"learning_rate": 2.2823057833449013e-05,
|
|
"loss": 0.336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3518397808074951,
|
|
"step": 2205,
|
|
"valid_targets_mean": 4132.3,
|
|
"valid_targets_min": 1142
|
|
},
|
|
{
|
|
"epoch": 3.576051779935275,
|
|
"grad_norm": 0.5907129322549025,
|
|
"learning_rate": 2.274314453728386e-05,
|
|
"loss": 0.3344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2956004738807678,
|
|
"step": 2210,
|
|
"valid_targets_mean": 4665.3,
|
|
"valid_targets_min": 1871
|
|
},
|
|
{
|
|
"epoch": 3.5841423948220066,
|
|
"grad_norm": 0.5773586353010518,
|
|
"learning_rate": 2.2663186581084777e-05,
|
|
"loss": 0.315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30565541982650757,
|
|
"step": 2215,
|
|
"valid_targets_mean": 5279.4,
|
|
"valid_targets_min": 1157
|
|
},
|
|
{
|
|
"epoch": 3.592233009708738,
|
|
"grad_norm": 0.6978125591124367,
|
|
"learning_rate": 2.258318526661511e-05,
|
|
"loss": 0.3,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29132378101348877,
|
|
"step": 2220,
|
|
"valid_targets_mean": 3529.3,
|
|
"valid_targets_min": 1433
|
|
},
|
|
{
|
|
"epoch": 3.600323624595469,
|
|
"grad_norm": 0.7250064098063926,
|
|
"learning_rate": 2.250314189634412e-05,
|
|
"loss": 0.3619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35525816679000854,
|
|
"step": 2225,
|
|
"valid_targets_mean": 3208.6,
|
|
"valid_targets_min": 1345
|
|
},
|
|
{
|
|
"epoch": 3.6084142394822005,
|
|
"grad_norm": 0.6810472579293779,
|
|
"learning_rate": 2.2423057773425745e-05,
|
|
"loss": 0.3143,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32046782970428467,
|
|
"step": 2230,
|
|
"valid_targets_mean": 3731.8,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 3.616504854368932,
|
|
"grad_norm": 0.6576908902184688,
|
|
"learning_rate": 2.2342934201677422e-05,
|
|
"loss": 0.339,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33715614676475525,
|
|
"step": 2235,
|
|
"valid_targets_mean": 4375.0,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 3.6245954692556634,
|
|
"grad_norm": 0.8395915796514543,
|
|
"learning_rate": 2.2262772485558814e-05,
|
|
"loss": 0.3304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3146824240684509,
|
|
"step": 2240,
|
|
"valid_targets_mean": 2428.5,
|
|
"valid_targets_min": 895
|
|
},
|
|
{
|
|
"epoch": 3.632686084142395,
|
|
"grad_norm": 0.7431826617479385,
|
|
"learning_rate": 2.218257393015062e-05,
|
|
"loss": 0.3391,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33532094955444336,
|
|
"step": 2245,
|
|
"valid_targets_mean": 3131.6,
|
|
"valid_targets_min": 1451
|
|
},
|
|
{
|
|
"epoch": 3.6407766990291264,
|
|
"grad_norm": 0.6622130333948848,
|
|
"learning_rate": 2.2102339841133285e-05,
|
|
"loss": 0.3193,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3246265649795532,
|
|
"step": 2250,
|
|
"valid_targets_mean": 4755.8,
|
|
"valid_targets_min": 1169
|
|
},
|
|
{
|
|
"epoch": 3.648867313915858,
|
|
"grad_norm": 0.6260074949359321,
|
|
"learning_rate": 2.2022071524765786e-05,
|
|
"loss": 0.3034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29530560970306396,
|
|
"step": 2255,
|
|
"valid_targets_mean": 4335.2,
|
|
"valid_targets_min": 1644
|
|
},
|
|
{
|
|
"epoch": 3.656957928802589,
|
|
"grad_norm": 0.6440630664498894,
|
|
"learning_rate": 2.1941770287864315e-05,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28719133138656616,
|
|
"step": 2260,
|
|
"valid_targets_mean": 3863.4,
|
|
"valid_targets_min": 1861
|
|
},
|
|
{
|
|
"epoch": 3.6650485436893203,
|
|
"grad_norm": 0.6126886249442058,
|
|
"learning_rate": 2.1861437437781045e-05,
|
|
"loss": 0.3118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3384862244129181,
|
|
"step": 2265,
|
|
"valid_targets_mean": 4486.1,
|
|
"valid_targets_min": 1413
|
|
},
|
|
{
|
|
"epoch": 3.6731391585760518,
|
|
"grad_norm": 0.7024210805695811,
|
|
"learning_rate": 2.1781074282382834e-05,
|
|
"loss": 0.3034,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33213749527931213,
|
|
"step": 2270,
|
|
"valid_targets_mean": 3520.7,
|
|
"valid_targets_min": 1401
|
|
},
|
|
{
|
|
"epoch": 3.6812297734627832,
|
|
"grad_norm": 0.7445912146264361,
|
|
"learning_rate": 2.170068213002992e-05,
|
|
"loss": 0.3059,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32845768332481384,
|
|
"step": 2275,
|
|
"valid_targets_mean": 3274.1,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 3.6893203883495147,
|
|
"grad_norm": 0.7396134273791413,
|
|
"learning_rate": 2.1620262289554628e-05,
|
|
"loss": 0.327,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3479768633842468,
|
|
"step": 2280,
|
|
"valid_targets_mean": 3706.2,
|
|
"valid_targets_min": 1061
|
|
},
|
|
{
|
|
"epoch": 3.6974110032362457,
|
|
"grad_norm": 0.6430183101362507,
|
|
"learning_rate": 2.1539816070240062e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28230059146881104,
|
|
"step": 2285,
|
|
"valid_targets_mean": 3550.2,
|
|
"valid_targets_min": 1327
|
|
},
|
|
{
|
|
"epoch": 3.705501618122977,
|
|
"grad_norm": 0.6548295376788589,
|
|
"learning_rate": 2.1459344781798807e-05,
|
|
"loss": 0.3331,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32259976863861084,
|
|
"step": 2290,
|
|
"valid_targets_mean": 4379.5,
|
|
"valid_targets_min": 1476
|
|
},
|
|
{
|
|
"epoch": 3.7135922330097086,
|
|
"grad_norm": 0.5791411549074513,
|
|
"learning_rate": 2.1378849734351565e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30870115756988525,
|
|
"step": 2295,
|
|
"valid_targets_mean": 5011.4,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 3.72168284789644,
|
|
"grad_norm": 0.6686774018016203,
|
|
"learning_rate": 2.1298332238405856e-05,
|
|
"loss": 0.2997,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3138067126274109,
|
|
"step": 2300,
|
|
"valid_targets_mean": 3748.7,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 3.7297734627831716,
|
|
"grad_norm": 0.6176250019902619,
|
|
"learning_rate": 2.1217793604834687e-05,
|
|
"loss": 0.3208,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28627514839172363,
|
|
"step": 2305,
|
|
"valid_targets_mean": 4047.9,
|
|
"valid_targets_min": 1854
|
|
},
|
|
{
|
|
"epoch": 3.737864077669903,
|
|
"grad_norm": 0.6392998919122899,
|
|
"learning_rate": 2.1137235144855177e-05,
|
|
"loss": 0.3205,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30675268173217773,
|
|
"step": 2310,
|
|
"valid_targets_mean": 3985.9,
|
|
"valid_targets_min": 1189
|
|
},
|
|
{
|
|
"epoch": 3.7459546925566345,
|
|
"grad_norm": 0.6810561058243152,
|
|
"learning_rate": 2.1056658170007247e-05,
|
|
"loss": 0.3127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32047516107559204,
|
|
"step": 2315,
|
|
"valid_targets_mean": 4031.6,
|
|
"valid_targets_min": 1708
|
|
},
|
|
{
|
|
"epoch": 3.7540453074433655,
|
|
"grad_norm": 0.5224360936076557,
|
|
"learning_rate": 2.0976063992132252e-05,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21849115192890167,
|
|
"step": 2320,
|
|
"valid_targets_mean": 6091.9,
|
|
"valid_targets_min": 1642
|
|
},
|
|
{
|
|
"epoch": 3.762135922330097,
|
|
"grad_norm": 0.5087393518020041,
|
|
"learning_rate": 2.0895453923351613e-05,
|
|
"loss": 0.2107,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1869930475950241,
|
|
"step": 2325,
|
|
"valid_targets_mean": 5424.1,
|
|
"valid_targets_min": 4057
|
|
},
|
|
{
|
|
"epoch": 3.7702265372168284,
|
|
"grad_norm": 0.4499642484406925,
|
|
"learning_rate": 2.0814829276045465e-05,
|
|
"loss": 0.2086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2276923507452011,
|
|
"step": 2330,
|
|
"valid_targets_mean": 5988.4,
|
|
"valid_targets_min": 1506
|
|
},
|
|
{
|
|
"epoch": 3.77831715210356,
|
|
"grad_norm": 0.49784474395857836,
|
|
"learning_rate": 2.0734191362831294e-05,
|
|
"loss": 0.2003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22134488821029663,
|
|
"step": 2335,
|
|
"valid_targets_mean": 5366.7,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 3.7864077669902914,
|
|
"grad_norm": 0.4512208729363229,
|
|
"learning_rate": 2.0653541496542566e-05,
|
|
"loss": 0.2173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16249793767929077,
|
|
"step": 2340,
|
|
"valid_targets_mean": 6226.6,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 3.794498381877023,
|
|
"grad_norm": 0.5065208653158666,
|
|
"learning_rate": 2.0572880990207343e-05,
|
|
"loss": 0.1874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22169040143489838,
|
|
"step": 2345,
|
|
"valid_targets_mean": 5491.7,
|
|
"valid_targets_min": 2967
|
|
},
|
|
{
|
|
"epoch": 3.8025889967637543,
|
|
"grad_norm": 0.7330612173972851,
|
|
"learning_rate": 2.049221115702692e-05,
|
|
"loss": 0.1967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19703437387943268,
|
|
"step": 2350,
|
|
"valid_targets_mean": 5216.8,
|
|
"valid_targets_min": 716
|
|
},
|
|
{
|
|
"epoch": 3.8106796116504853,
|
|
"grad_norm": 0.5442930834831672,
|
|
"learning_rate": 2.0411533310354433e-05,
|
|
"loss": 0.1947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23777084052562714,
|
|
"step": 2355,
|
|
"valid_targets_mean": 5939.6,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 3.8187702265372168,
|
|
"grad_norm": 0.5018825408101142,
|
|
"learning_rate": 2.0330848763673484e-05,
|
|
"loss": 0.2097,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22420501708984375,
|
|
"step": 2360,
|
|
"valid_targets_mean": 5939.4,
|
|
"valid_targets_min": 2599
|
|
},
|
|
{
|
|
"epoch": 3.8268608414239482,
|
|
"grad_norm": 0.4970494413717442,
|
|
"learning_rate": 2.0250158830576764e-05,
|
|
"loss": 0.1857,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20009323954582214,
|
|
"step": 2365,
|
|
"valid_targets_mean": 6258.1,
|
|
"valid_targets_min": 1956
|
|
},
|
|
{
|
|
"epoch": 3.8349514563106797,
|
|
"grad_norm": 0.44923834669987023,
|
|
"learning_rate": 2.0169464824744636e-05,
|
|
"loss": 0.1981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1759965568780899,
|
|
"step": 2370,
|
|
"valid_targets_mean": 6348.2,
|
|
"valid_targets_min": 2993
|
|
},
|
|
{
|
|
"epoch": 3.843042071197411,
|
|
"grad_norm": 0.4495132904363796,
|
|
"learning_rate": 2.0088768059923795e-05,
|
|
"loss": 0.1933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19953864812850952,
|
|
"step": 2375,
|
|
"valid_targets_mean": 6097.6,
|
|
"valid_targets_min": 1803
|
|
},
|
|
{
|
|
"epoch": 3.851132686084142,
|
|
"grad_norm": 0.5096333824884599,
|
|
"learning_rate": 2.0008069849905847e-05,
|
|
"loss": 0.22,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24274849891662598,
|
|
"step": 2380,
|
|
"valid_targets_mean": 6063.0,
|
|
"valid_targets_min": 620
|
|
},
|
|
{
|
|
"epoch": 3.8592233009708736,
|
|
"grad_norm": 0.4745564296696438,
|
|
"learning_rate": 1.9927371508505915e-05,
|
|
"loss": 0.1962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21321144700050354,
|
|
"step": 2385,
|
|
"valid_targets_mean": 5397.2,
|
|
"valid_targets_min": 2991
|
|
},
|
|
{
|
|
"epoch": 3.867313915857605,
|
|
"grad_norm": 0.4561833046514106,
|
|
"learning_rate": 1.984667434954126e-05,
|
|
"loss": 0.2364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23293474316596985,
|
|
"step": 2390,
|
|
"valid_targets_mean": 6302.4,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 3.8754045307443366,
|
|
"grad_norm": 0.48692062588305735,
|
|
"learning_rate": 1.9765979686809912e-05,
|
|
"loss": 0.1988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20418483018875122,
|
|
"step": 2395,
|
|
"valid_targets_mean": 5835.7,
|
|
"valid_targets_min": 1488
|
|
},
|
|
{
|
|
"epoch": 3.883495145631068,
|
|
"grad_norm": 0.5760245409491545,
|
|
"learning_rate": 1.968528883406925e-05,
|
|
"loss": 0.1985,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20110785961151123,
|
|
"step": 2400,
|
|
"valid_targets_mean": 6698.3,
|
|
"valid_targets_min": 1041
|
|
},
|
|
{
|
|
"epoch": 3.8915857605177995,
|
|
"grad_norm": 0.455942195772971,
|
|
"learning_rate": 1.9604603105014616e-05,
|
|
"loss": 0.2206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19040292501449585,
|
|
"step": 2405,
|
|
"valid_targets_mean": 6100.8,
|
|
"valid_targets_min": 3304
|
|
},
|
|
{
|
|
"epoch": 3.899676375404531,
|
|
"grad_norm": 0.43188977809199325,
|
|
"learning_rate": 1.9523923813257957e-05,
|
|
"loss": 0.227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18241921067237854,
|
|
"step": 2410,
|
|
"valid_targets_mean": 7247.2,
|
|
"valid_targets_min": 3122
|
|
},
|
|
{
|
|
"epoch": 3.907766990291262,
|
|
"grad_norm": 0.438052906217528,
|
|
"learning_rate": 1.9443252272306384e-05,
|
|
"loss": 0.1975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2029222548007965,
|
|
"step": 2415,
|
|
"valid_targets_mean": 6403.1,
|
|
"valid_targets_min": 3898
|
|
},
|
|
{
|
|
"epoch": 3.9158576051779934,
|
|
"grad_norm": 0.47333386859383014,
|
|
"learning_rate": 1.9362589795540852e-05,
|
|
"loss": 0.2015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1605544537305832,
|
|
"step": 2420,
|
|
"valid_targets_mean": 5129.8,
|
|
"valid_targets_min": 1379
|
|
},
|
|
{
|
|
"epoch": 3.923948220064725,
|
|
"grad_norm": 0.5199177948375372,
|
|
"learning_rate": 1.9281937696194723e-05,
|
|
"loss": 0.1831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19830086827278137,
|
|
"step": 2425,
|
|
"valid_targets_mean": 5376.5,
|
|
"valid_targets_min": 2363
|
|
},
|
|
{
|
|
"epoch": 3.9320388349514563,
|
|
"grad_norm": 0.5206001127949775,
|
|
"learning_rate": 1.9201297287332428e-05,
|
|
"loss": 0.1963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1985754817724228,
|
|
"step": 2430,
|
|
"valid_targets_mean": 5625.6,
|
|
"valid_targets_min": 1809
|
|
},
|
|
{
|
|
"epoch": 3.940129449838188,
|
|
"grad_norm": 0.5033961651148904,
|
|
"learning_rate": 1.912066988182806e-05,
|
|
"loss": 0.2091,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20823442935943604,
|
|
"step": 2435,
|
|
"valid_targets_mean": 6466.5,
|
|
"valid_targets_min": 3494
|
|
},
|
|
{
|
|
"epoch": 3.948220064724919,
|
|
"grad_norm": 0.4571578459678172,
|
|
"learning_rate": 1.9040056792344005e-05,
|
|
"loss": 0.1795,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19116318225860596,
|
|
"step": 2440,
|
|
"valid_targets_mean": 6313.9,
|
|
"valid_targets_min": 1775
|
|
},
|
|
{
|
|
"epoch": 3.9563106796116507,
|
|
"grad_norm": 0.44341434436450294,
|
|
"learning_rate": 1.8959459331309576e-05,
|
|
"loss": 0.2044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19575943052768707,
|
|
"step": 2445,
|
|
"valid_targets_mean": 6352.2,
|
|
"valid_targets_min": 1426
|
|
},
|
|
{
|
|
"epoch": 3.9644012944983817,
|
|
"grad_norm": 0.45862690264733,
|
|
"learning_rate": 1.8878878810899653e-05,
|
|
"loss": 0.1959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17178326845169067,
|
|
"step": 2450,
|
|
"valid_targets_mean": 6052.2,
|
|
"valid_targets_min": 3261
|
|
},
|
|
{
|
|
"epoch": 3.972491909385113,
|
|
"grad_norm": 0.47784816019033394,
|
|
"learning_rate": 1.8798316543013317e-05,
|
|
"loss": 0.2259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21690775454044342,
|
|
"step": 2455,
|
|
"valid_targets_mean": 5542.8,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 3.9805825242718447,
|
|
"grad_norm": 0.4393081044956994,
|
|
"learning_rate": 1.8717773839252477e-05,
|
|
"loss": 0.1967,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17442364990711212,
|
|
"step": 2460,
|
|
"valid_targets_mean": 5560.8,
|
|
"valid_targets_min": 1301
|
|
},
|
|
{
|
|
"epoch": 3.988673139158576,
|
|
"grad_norm": 0.4967733193720244,
|
|
"learning_rate": 1.8637252010900515e-05,
|
|
"loss": 0.2095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19287577271461487,
|
|
"step": 2465,
|
|
"valid_targets_mean": 5077.6,
|
|
"valid_targets_min": 422
|
|
},
|
|
{
|
|
"epoch": 3.9967637540453076,
|
|
"grad_norm": 0.5150380483343356,
|
|
"learning_rate": 1.8556752368900972e-05,
|
|
"loss": 0.2011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16168953478336334,
|
|
"step": 2470,
|
|
"valid_targets_mean": 5078.6,
|
|
"valid_targets_min": 310
|
|
},
|
|
{
|
|
"epoch": 4.004854368932039,
|
|
"grad_norm": 0.5006842825532989,
|
|
"learning_rate": 1.8476276223836177e-05,
|
|
"loss": 0.2593,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28505608439445496,
|
|
"step": 2475,
|
|
"valid_targets_mean": 7461.8,
|
|
"valid_targets_min": 4222
|
|
},
|
|
{
|
|
"epoch": 4.0129449838187705,
|
|
"grad_norm": 0.4449390233467316,
|
|
"learning_rate": 1.8395824885905898e-05,
|
|
"loss": 0.2862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3080679178237915,
|
|
"step": 2480,
|
|
"valid_targets_mean": 8813.2,
|
|
"valid_targets_min": 3435
|
|
},
|
|
{
|
|
"epoch": 4.0210355987055015,
|
|
"grad_norm": 0.45517804304428394,
|
|
"learning_rate": 1.8315399664906062e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30526304244995117,
|
|
"step": 2485,
|
|
"valid_targets_mean": 8051.2,
|
|
"valid_targets_min": 2932
|
|
},
|
|
{
|
|
"epoch": 4.029126213592233,
|
|
"grad_norm": 0.46941839918358313,
|
|
"learning_rate": 1.823500187020735e-05,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32779282331466675,
|
|
"step": 2490,
|
|
"valid_targets_mean": 8165.1,
|
|
"valid_targets_min": 2978
|
|
},
|
|
{
|
|
"epoch": 4.0372168284789645,
|
|
"grad_norm": 0.4965357687908804,
|
|
"learning_rate": 1.815463281073396e-05,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27793848514556885,
|
|
"step": 2495,
|
|
"valid_targets_mean": 7405.6,
|
|
"valid_targets_min": 3938
|
|
},
|
|
{
|
|
"epoch": 4.0453074433656955,
|
|
"grad_norm": 0.5034223412827382,
|
|
"learning_rate": 1.8074293794942262e-05,
|
|
"loss": 0.2797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26466065645217896,
|
|
"step": 2500,
|
|
"valid_targets_mean": 6687.1,
|
|
"valid_targets_min": 2974
|
|
},
|
|
{
|
|
"epoch": 4.053398058252427,
|
|
"grad_norm": 0.440367832467616,
|
|
"learning_rate": 1.7993986130799477e-05,
|
|
"loss": 0.3048,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32282423973083496,
|
|
"step": 2505,
|
|
"valid_targets_mean": 7811.5,
|
|
"valid_targets_min": 3018
|
|
},
|
|
{
|
|
"epoch": 4.061488673139158,
|
|
"grad_norm": 0.48021732325557415,
|
|
"learning_rate": 1.7913711125762435e-05,
|
|
"loss": 0.2872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25816529989242554,
|
|
"step": 2510,
|
|
"valid_targets_mean": 6452.9,
|
|
"valid_targets_min": 956
|
|
},
|
|
{
|
|
"epoch": 4.06957928802589,
|
|
"grad_norm": 0.4512446325214269,
|
|
"learning_rate": 1.7833470086756214e-05,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2777278423309326,
|
|
"step": 2515,
|
|
"valid_targets_mean": 7293.6,
|
|
"valid_targets_min": 4247
|
|
},
|
|
{
|
|
"epoch": 4.077669902912621,
|
|
"grad_norm": 0.42845468052968455,
|
|
"learning_rate": 1.7753264320152934e-05,
|
|
"loss": 0.2651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2563123106956482,
|
|
"step": 2520,
|
|
"valid_targets_mean": 7259.2,
|
|
"valid_targets_min": 2191
|
|
},
|
|
{
|
|
"epoch": 4.085760517799352,
|
|
"grad_norm": 0.4894422981849186,
|
|
"learning_rate": 1.7673095131750454e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2566876709461212,
|
|
"step": 2525,
|
|
"valid_targets_mean": 6200.4,
|
|
"valid_targets_min": 2703
|
|
},
|
|
{
|
|
"epoch": 4.093851132686084,
|
|
"grad_norm": 0.5167026880546517,
|
|
"learning_rate": 1.759296382675112e-05,
|
|
"loss": 0.3038,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32553917169570923,
|
|
"step": 2530,
|
|
"valid_targets_mean": 6045.4,
|
|
"valid_targets_min": 1991
|
|
},
|
|
{
|
|
"epoch": 4.101941747572815,
|
|
"grad_norm": 0.456225396026587,
|
|
"learning_rate": 1.7512871709740515e-05,
|
|
"loss": 0.2879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3135607838630676,
|
|
"step": 2535,
|
|
"valid_targets_mean": 8484.8,
|
|
"valid_targets_min": 4321
|
|
},
|
|
{
|
|
"epoch": 4.110032362459547,
|
|
"grad_norm": 0.4359728834305482,
|
|
"learning_rate": 1.743282008466619e-05,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.257236123085022,
|
|
"step": 2540,
|
|
"valid_targets_mean": 6967.7,
|
|
"valid_targets_min": 3742
|
|
},
|
|
{
|
|
"epoch": 4.118122977346278,
|
|
"grad_norm": 0.47833998100504577,
|
|
"learning_rate": 1.7352810254816498e-05,
|
|
"loss": 0.2912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34937071800231934,
|
|
"step": 2545,
|
|
"valid_targets_mean": 8730.6,
|
|
"valid_targets_min": 4829
|
|
},
|
|
{
|
|
"epoch": 4.12621359223301,
|
|
"grad_norm": 0.4507590764600118,
|
|
"learning_rate": 1.727284352279934e-05,
|
|
"loss": 0.3133,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.338039368391037,
|
|
"step": 2550,
|
|
"valid_targets_mean": 8258.9,
|
|
"valid_targets_min": 2392
|
|
},
|
|
{
|
|
"epoch": 4.134304207119741,
|
|
"grad_norm": 0.46815652183190226,
|
|
"learning_rate": 1.7192921190520936e-05,
|
|
"loss": 0.2719,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2833985984325409,
|
|
"step": 2555,
|
|
"valid_targets_mean": 7541.9,
|
|
"valid_targets_min": 4054
|
|
},
|
|
{
|
|
"epoch": 4.142394822006472,
|
|
"grad_norm": 0.7041539993056743,
|
|
"learning_rate": 1.7113044559164657e-05,
|
|
"loss": 0.26,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2381104826927185,
|
|
"step": 2560,
|
|
"valid_targets_mean": 6805.5,
|
|
"valid_targets_min": 3012
|
|
},
|
|
{
|
|
"epoch": 4.150485436893204,
|
|
"grad_norm": 0.45520173715987966,
|
|
"learning_rate": 1.7033214929169847e-05,
|
|
"loss": 0.2864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3208598494529724,
|
|
"step": 2565,
|
|
"valid_targets_mean": 8233.7,
|
|
"valid_targets_min": 3694
|
|
},
|
|
{
|
|
"epoch": 4.158576051779935,
|
|
"grad_norm": 0.4763941474946576,
|
|
"learning_rate": 1.695343360021064e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28221839666366577,
|
|
"step": 2570,
|
|
"valid_targets_mean": 6891.4,
|
|
"valid_targets_min": 3734
|
|
},
|
|
{
|
|
"epoch": 4.166666666666667,
|
|
"grad_norm": 0.4372845011040278,
|
|
"learning_rate": 1.6873701871174782e-05,
|
|
"loss": 0.2888,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.316440224647522,
|
|
"step": 2575,
|
|
"valid_targets_mean": 8462.8,
|
|
"valid_targets_min": 2285
|
|
},
|
|
{
|
|
"epoch": 4.174757281553398,
|
|
"grad_norm": 0.4319984625589556,
|
|
"learning_rate": 1.6794021040142534e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2788882851600647,
|
|
"step": 2580,
|
|
"valid_targets_mean": 7721.5,
|
|
"valid_targets_min": 4003
|
|
},
|
|
{
|
|
"epoch": 4.18284789644013,
|
|
"grad_norm": 0.4459508263295891,
|
|
"learning_rate": 1.6714392404365467e-05,
|
|
"loss": 0.2874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29237473011016846,
|
|
"step": 2585,
|
|
"valid_targets_mean": 7546.4,
|
|
"valid_targets_min": 3816
|
|
},
|
|
{
|
|
"epoch": 4.190938511326861,
|
|
"grad_norm": 0.44578985610791283,
|
|
"learning_rate": 1.6634817260245417e-05,
|
|
"loss": 0.2739,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26097139716148376,
|
|
"step": 2590,
|
|
"valid_targets_mean": 7171.8,
|
|
"valid_targets_min": 2549
|
|
},
|
|
{
|
|
"epoch": 4.199029126213592,
|
|
"grad_norm": 0.4837956443790072,
|
|
"learning_rate": 1.655529690331332e-05,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2902294993400574,
|
|
"step": 2595,
|
|
"valid_targets_mean": 6778.6,
|
|
"valid_targets_min": 2080
|
|
},
|
|
{
|
|
"epoch": 4.207119741100324,
|
|
"grad_norm": 0.8167988548284764,
|
|
"learning_rate": 1.6475832628208165e-05,
|
|
"loss": 0.3336,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42150378227233887,
|
|
"step": 2600,
|
|
"valid_targets_mean": 3177.8,
|
|
"valid_targets_min": 1294
|
|
},
|
|
{
|
|
"epoch": 4.215210355987055,
|
|
"grad_norm": 0.7615957557370234,
|
|
"learning_rate": 1.6396425728655874e-05,
|
|
"loss": 0.3736,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.37615785002708435,
|
|
"step": 2605,
|
|
"valid_targets_mean": 3481.9,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 4.223300970873787,
|
|
"grad_norm": 0.7887356809021314,
|
|
"learning_rate": 1.6317077497448278e-05,
|
|
"loss": 0.3611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32747870683670044,
|
|
"step": 2610,
|
|
"valid_targets_mean": 2773.0,
|
|
"valid_targets_min": 1253
|
|
},
|
|
{
|
|
"epoch": 4.231391585760518,
|
|
"grad_norm": 0.7388958962034481,
|
|
"learning_rate": 1.6237789226422033e-05,
|
|
"loss": 0.3449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3640366196632385,
|
|
"step": 2615,
|
|
"valid_targets_mean": 3517.6,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 4.239482200647249,
|
|
"grad_norm": 0.8211350839413274,
|
|
"learning_rate": 1.6158562206437634e-05,
|
|
"loss": 0.3689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3327547311782837,
|
|
"step": 2620,
|
|
"valid_targets_mean": 2927.6,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 4.247572815533981,
|
|
"grad_norm": 0.8017947866736652,
|
|
"learning_rate": 1.6079397727358345e-05,
|
|
"loss": 0.3635,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33964037895202637,
|
|
"step": 2625,
|
|
"valid_targets_mean": 3221.8,
|
|
"valid_targets_min": 1246
|
|
},
|
|
{
|
|
"epoch": 4.255663430420712,
|
|
"grad_norm": 0.8231664391013209,
|
|
"learning_rate": 1.6000297078029256e-05,
|
|
"loss": 0.3763,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.41385623812675476,
|
|
"step": 2630,
|
|
"valid_targets_mean": 3345.7,
|
|
"valid_targets_min": 1123
|
|
},
|
|
{
|
|
"epoch": 4.263754045307444,
|
|
"grad_norm": 0.807482211440655,
|
|
"learning_rate": 1.5921261546256236e-05,
|
|
"loss": 0.3429,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3225884735584259,
|
|
"step": 2635,
|
|
"valid_targets_mean": 2662.1,
|
|
"valid_targets_min": 1310
|
|
},
|
|
{
|
|
"epoch": 4.271844660194175,
|
|
"grad_norm": 0.7308639134760625,
|
|
"learning_rate": 1.5842292418785026e-05,
|
|
"loss": 0.341,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3007241189479828,
|
|
"step": 2640,
|
|
"valid_targets_mean": 3257.6,
|
|
"valid_targets_min": 1319
|
|
},
|
|
{
|
|
"epoch": 4.2799352750809065,
|
|
"grad_norm": 0.805650608661951,
|
|
"learning_rate": 1.5763390981280258e-05,
|
|
"loss": 0.3678,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39171522855758667,
|
|
"step": 2645,
|
|
"valid_targets_mean": 3564.8,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 4.288025889967638,
|
|
"grad_norm": 0.8261461823177421,
|
|
"learning_rate": 1.568455851830453e-05,
|
|
"loss": 0.3474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.318564772605896,
|
|
"step": 2650,
|
|
"valid_targets_mean": 2672.8,
|
|
"valid_targets_min": 1292
|
|
},
|
|
{
|
|
"epoch": 4.296116504854369,
|
|
"grad_norm": 0.8080317661606545,
|
|
"learning_rate": 1.5605796313297502e-05,
|
|
"loss": 0.3602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.36015939712524414,
|
|
"step": 2655,
|
|
"valid_targets_mean": 3064.9,
|
|
"valid_targets_min": 977
|
|
},
|
|
{
|
|
"epoch": 4.3042071197411005,
|
|
"grad_norm": 0.6955171780036041,
|
|
"learning_rate": 1.552710564855498e-05,
|
|
"loss": 0.3383,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3366152048110962,
|
|
"step": 2660,
|
|
"valid_targets_mean": 3880.6,
|
|
"valid_targets_min": 877
|
|
},
|
|
{
|
|
"epoch": 4.3122977346278315,
|
|
"grad_norm": 0.7976014610219154,
|
|
"learning_rate": 1.5448487805208047e-05,
|
|
"loss": 0.334,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3383542001247406,
|
|
"step": 2665,
|
|
"valid_targets_mean": 3467.2,
|
|
"valid_targets_min": 1475
|
|
},
|
|
{
|
|
"epoch": 4.320388349514563,
|
|
"grad_norm": 0.7704921245778071,
|
|
"learning_rate": 1.5369944063202228e-05,
|
|
"loss": 0.3292,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3546125292778015,
|
|
"step": 2670,
|
|
"valid_targets_mean": 3955.9,
|
|
"valid_targets_min": 1137
|
|
},
|
|
{
|
|
"epoch": 4.328478964401294,
|
|
"grad_norm": 0.8636778847639369,
|
|
"learning_rate": 1.529147570127663e-05,
|
|
"loss": 0.3414,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35152947902679443,
|
|
"step": 2675,
|
|
"valid_targets_mean": 3870.5,
|
|
"valid_targets_min": 1294
|
|
},
|
|
{
|
|
"epoch": 4.336569579288026,
|
|
"grad_norm": 0.781729212171773,
|
|
"learning_rate": 1.5213083996943124e-05,
|
|
"loss": 0.3304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3037518858909607,
|
|
"step": 2680,
|
|
"valid_targets_mean": 3128.0,
|
|
"valid_targets_min": 1222
|
|
},
|
|
{
|
|
"epoch": 4.344660194174757,
|
|
"grad_norm": 0.8221148280461172,
|
|
"learning_rate": 1.5134770226465533e-05,
|
|
"loss": 0.3461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34873682260513306,
|
|
"step": 2685,
|
|
"valid_targets_mean": 3277.3,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 4.352750809061488,
|
|
"grad_norm": 0.9288812715286362,
|
|
"learning_rate": 1.5056535664838894e-05,
|
|
"loss": 0.3252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3221864700317383,
|
|
"step": 2690,
|
|
"valid_targets_mean": 3244.1,
|
|
"valid_targets_min": 1388
|
|
},
|
|
{
|
|
"epoch": 4.36084142394822,
|
|
"grad_norm": 0.9070208170441065,
|
|
"learning_rate": 1.4978381585768676e-05,
|
|
"loss": 0.3611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3749698996543884,
|
|
"step": 2695,
|
|
"valid_targets_mean": 2561.9,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 4.368932038834951,
|
|
"grad_norm": 1.0518568013946572,
|
|
"learning_rate": 1.490030926165002e-05,
|
|
"loss": 0.3364,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3401104807853699,
|
|
"step": 2700,
|
|
"valid_targets_mean": 1831.4,
|
|
"valid_targets_min": 1013
|
|
},
|
|
{
|
|
"epoch": 4.377022653721683,
|
|
"grad_norm": 0.8470545820516009,
|
|
"learning_rate": 1.4822319963547083e-05,
|
|
"loss": 0.3453,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34943297505378723,
|
|
"step": 2705,
|
|
"valid_targets_mean": 2959.6,
|
|
"valid_targets_min": 1689
|
|
},
|
|
{
|
|
"epoch": 4.385113268608414,
|
|
"grad_norm": 0.8641001873331948,
|
|
"learning_rate": 1.4744414961172267e-05,
|
|
"loss": 0.3528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3659989833831787,
|
|
"step": 2710,
|
|
"valid_targets_mean": 3059.9,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 4.393203883495145,
|
|
"grad_norm": 0.88611420485447,
|
|
"learning_rate": 1.4666595522865628e-05,
|
|
"loss": 0.3404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34444698691368103,
|
|
"step": 2715,
|
|
"valid_targets_mean": 2292.4,
|
|
"valid_targets_min": 1128
|
|
},
|
|
{
|
|
"epoch": 4.401294498381877,
|
|
"grad_norm": 1.3843610065295007,
|
|
"learning_rate": 1.4588862915574158e-05,
|
|
"loss": 0.3398,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3170504570007324,
|
|
"step": 2720,
|
|
"valid_targets_mean": 3623.2,
|
|
"valid_targets_min": 999
|
|
},
|
|
{
|
|
"epoch": 4.409385113268608,
|
|
"grad_norm": 0.7553534225613581,
|
|
"learning_rate": 1.4511218404831208e-05,
|
|
"loss": 0.328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32873186469078064,
|
|
"step": 2725,
|
|
"valid_targets_mean": 3616.4,
|
|
"valid_targets_min": 1483
|
|
},
|
|
{
|
|
"epoch": 4.41747572815534,
|
|
"grad_norm": 0.8279883071817279,
|
|
"learning_rate": 1.4433663254735868e-05,
|
|
"loss": 0.3529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.39501428604125977,
|
|
"step": 2730,
|
|
"valid_targets_mean": 3939.2,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 4.425566343042071,
|
|
"grad_norm": 0.8436452088585356,
|
|
"learning_rate": 1.4356198727932357e-05,
|
|
"loss": 0.3353,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3371664583683014,
|
|
"step": 2735,
|
|
"valid_targets_mean": 3482.7,
|
|
"valid_targets_min": 1309
|
|
},
|
|
{
|
|
"epoch": 4.433656957928803,
|
|
"grad_norm": 0.8425594216816737,
|
|
"learning_rate": 1.427882608558951e-05,
|
|
"loss": 0.3438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3945726752281189,
|
|
"step": 2740,
|
|
"valid_targets_mean": 3203.9,
|
|
"valid_targets_min": 1117
|
|
},
|
|
{
|
|
"epoch": 4.441747572815534,
|
|
"grad_norm": 0.8552168356844453,
|
|
"learning_rate": 1.420154658738023e-05,
|
|
"loss": 0.3254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31872445344924927,
|
|
"step": 2745,
|
|
"valid_targets_mean": 3138.2,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 4.449838187702265,
|
|
"grad_norm": 0.7442574111597251,
|
|
"learning_rate": 1.4124361491460979e-05,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3369321823120117,
|
|
"step": 2750,
|
|
"valid_targets_mean": 3830.7,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 4.457928802588997,
|
|
"grad_norm": 0.7566819794836184,
|
|
"learning_rate": 1.4047272054451288e-05,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3010716438293457,
|
|
"step": 2755,
|
|
"valid_targets_mean": 3429.1,
|
|
"valid_targets_min": 1021
|
|
},
|
|
{
|
|
"epoch": 4.466019417475728,
|
|
"grad_norm": 0.6907319746727313,
|
|
"learning_rate": 1.397027953141329e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2925443649291992,
|
|
"step": 2760,
|
|
"valid_targets_mean": 3514.2,
|
|
"valid_targets_min": 946
|
|
},
|
|
{
|
|
"epoch": 4.47411003236246,
|
|
"grad_norm": 0.6438477052295435,
|
|
"learning_rate": 1.3893385175831326e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3193925619125366,
|
|
"step": 2765,
|
|
"valid_targets_mean": 3938.4,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 4.482200647249191,
|
|
"grad_norm": 0.6045823119652175,
|
|
"learning_rate": 1.3816590239591502e-05,
|
|
"loss": 0.3221,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3105623126029968,
|
|
"step": 2770,
|
|
"valid_targets_mean": 5108.3,
|
|
"valid_targets_min": 1959
|
|
},
|
|
{
|
|
"epoch": 4.490291262135923,
|
|
"grad_norm": 0.7509833584246006,
|
|
"learning_rate": 1.3739895972961312e-05,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3150097131729126,
|
|
"step": 2775,
|
|
"valid_targets_mean": 3583.8,
|
|
"valid_targets_min": 660
|
|
},
|
|
{
|
|
"epoch": 4.498381877022654,
|
|
"grad_norm": 0.7392542720919617,
|
|
"learning_rate": 1.3663303624569303e-05,
|
|
"loss": 0.3232,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3625081181526184,
|
|
"step": 2780,
|
|
"valid_targets_mean": 3303.5,
|
|
"valid_targets_min": 1571
|
|
},
|
|
{
|
|
"epoch": 4.506472491909385,
|
|
"grad_norm": 0.7497171068842345,
|
|
"learning_rate": 1.3586814441384707e-05,
|
|
"loss": 0.3279,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32502812147140503,
|
|
"step": 2785,
|
|
"valid_targets_mean": 3323.2,
|
|
"valid_targets_min": 1681
|
|
},
|
|
{
|
|
"epoch": 4.514563106796117,
|
|
"grad_norm": 0.631259857417725,
|
|
"learning_rate": 1.3510429668697188e-05,
|
|
"loss": 0.3042,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28541266918182373,
|
|
"step": 2790,
|
|
"valid_targets_mean": 4724.1,
|
|
"valid_targets_min": 1245
|
|
},
|
|
{
|
|
"epoch": 4.522653721682848,
|
|
"grad_norm": 0.6720103718042312,
|
|
"learning_rate": 1.3434150550096534e-05,
|
|
"loss": 0.3051,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3023044764995575,
|
|
"step": 2795,
|
|
"valid_targets_mean": 3885.1,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 4.53074433656958,
|
|
"grad_norm": 0.7672977115433848,
|
|
"learning_rate": 1.3357978327452425e-05,
|
|
"loss": 0.3197,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3622583746910095,
|
|
"step": 2800,
|
|
"valid_targets_mean": 4696.1,
|
|
"valid_targets_min": 1528
|
|
},
|
|
{
|
|
"epoch": 4.538834951456311,
|
|
"grad_norm": 0.70767106431573,
|
|
"learning_rate": 1.3281914240894218e-05,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.307115375995636,
|
|
"step": 2805,
|
|
"valid_targets_mean": 4114.5,
|
|
"valid_targets_min": 1384
|
|
},
|
|
{
|
|
"epoch": 4.546925566343042,
|
|
"grad_norm": 0.6165219862728282,
|
|
"learning_rate": 1.320595952879073e-05,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30621129274368286,
|
|
"step": 2810,
|
|
"valid_targets_mean": 4788.4,
|
|
"valid_targets_min": 1569
|
|
},
|
|
{
|
|
"epoch": 4.555016181229774,
|
|
"grad_norm": 0.653761496025403,
|
|
"learning_rate": 1.3130115427730106e-05,
|
|
"loss": 0.2966,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2891233265399933,
|
|
"step": 2815,
|
|
"valid_targets_mean": 3966.3,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 4.563106796116505,
|
|
"grad_norm": 0.7130430735759704,
|
|
"learning_rate": 1.3054383172499688e-05,
|
|
"loss": 0.3099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2976634204387665,
|
|
"step": 2820,
|
|
"valid_targets_mean": 3847.1,
|
|
"valid_targets_min": 815
|
|
},
|
|
{
|
|
"epoch": 4.5711974110032365,
|
|
"grad_norm": 0.709782236189302,
|
|
"learning_rate": 1.2978763996065888e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3229979872703552,
|
|
"step": 2825,
|
|
"valid_targets_mean": 3528.3,
|
|
"valid_targets_min": 838
|
|
},
|
|
{
|
|
"epoch": 4.5792880258899675,
|
|
"grad_norm": 0.6662440633945301,
|
|
"learning_rate": 1.2903259129554138e-05,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2709886431694031,
|
|
"step": 2830,
|
|
"valid_targets_mean": 3431.5,
|
|
"valid_targets_min": 1564
|
|
},
|
|
{
|
|
"epoch": 4.5873786407766985,
|
|
"grad_norm": 0.7092538828288426,
|
|
"learning_rate": 1.2827869802228816e-05,
|
|
"loss": 0.2963,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2797761559486389,
|
|
"step": 2835,
|
|
"valid_targets_mean": 3251.3,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 4.5954692556634305,
|
|
"grad_norm": 0.7572676051927053,
|
|
"learning_rate": 1.2752597241473273e-05,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3064771294593811,
|
|
"step": 2840,
|
|
"valid_targets_mean": 3475.8,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 4.6035598705501615,
|
|
"grad_norm": 0.7552895417197777,
|
|
"learning_rate": 1.2677442672769832e-05,
|
|
"loss": 0.3289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2986189126968384,
|
|
"step": 2845,
|
|
"valid_targets_mean": 3176.6,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 4.611650485436893,
|
|
"grad_norm": 1.5698337614598337,
|
|
"learning_rate": 1.2602407319679822e-05,
|
|
"loss": 0.3024,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3134298622608185,
|
|
"step": 2850,
|
|
"valid_targets_mean": 3906.7,
|
|
"valid_targets_min": 1499
|
|
},
|
|
{
|
|
"epoch": 4.619741100323624,
|
|
"grad_norm": 0.8306746135033737,
|
|
"learning_rate": 1.2527492403823664e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3346129059791565,
|
|
"step": 2855,
|
|
"valid_targets_mean": 3102.3,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 4.627831715210356,
|
|
"grad_norm": 0.7384107704279723,
|
|
"learning_rate": 1.2452699144861012e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3285127878189087,
|
|
"step": 2860,
|
|
"valid_targets_mean": 3321.5,
|
|
"valid_targets_min": 1397
|
|
},
|
|
{
|
|
"epoch": 4.635922330097087,
|
|
"grad_norm": 0.7333343742356498,
|
|
"learning_rate": 1.237802876047086e-05,
|
|
"loss": 0.3056,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2842520773410797,
|
|
"step": 2865,
|
|
"valid_targets_mean": 3407.2,
|
|
"valid_targets_min": 1416
|
|
},
|
|
{
|
|
"epoch": 4.644012944983819,
|
|
"grad_norm": 0.684739860910513,
|
|
"learning_rate": 1.2303482466331727e-05,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27704834938049316,
|
|
"step": 2870,
|
|
"valid_targets_mean": 4077.6,
|
|
"valid_targets_min": 1619
|
|
},
|
|
{
|
|
"epoch": 4.65210355987055,
|
|
"grad_norm": 0.774294316328799,
|
|
"learning_rate": 1.2229061476101883e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31210005283355713,
|
|
"step": 2875,
|
|
"valid_targets_mean": 3397.1,
|
|
"valid_targets_min": 1294
|
|
},
|
|
{
|
|
"epoch": 4.660194174757281,
|
|
"grad_norm": 0.5794806170925674,
|
|
"learning_rate": 1.2154767001399551e-05,
|
|
"loss": 0.2817,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2938528060913086,
|
|
"step": 2880,
|
|
"valid_targets_mean": 5222.0,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 4.668284789644013,
|
|
"grad_norm": 0.5590770240692666,
|
|
"learning_rate": 1.2080600251783233e-05,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2748606503009796,
|
|
"step": 2885,
|
|
"valid_targets_mean": 4730.9,
|
|
"valid_targets_min": 1312
|
|
},
|
|
{
|
|
"epoch": 4.676375404530744,
|
|
"grad_norm": 0.6572005056341561,
|
|
"learning_rate": 1.2006562434731968e-05,
|
|
"loss": 0.2809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2911245822906494,
|
|
"step": 2890,
|
|
"valid_targets_mean": 3836.6,
|
|
"valid_targets_min": 1517
|
|
},
|
|
{
|
|
"epoch": 4.684466019417476,
|
|
"grad_norm": 0.6985233617878603,
|
|
"learning_rate": 1.193265475562571e-05,
|
|
"loss": 0.2981,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.325228214263916,
|
|
"step": 2895,
|
|
"valid_targets_mean": 4618.6,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 4.692556634304207,
|
|
"grad_norm": 0.874595786129551,
|
|
"learning_rate": 1.1858878417725683e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28530287742614746,
|
|
"step": 2900,
|
|
"valid_targets_mean": 4125.9,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 4.700647249190938,
|
|
"grad_norm": 0.6429408856747463,
|
|
"learning_rate": 1.1785234622154797e-05,
|
|
"loss": 0.2957,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.262306272983551,
|
|
"step": 2905,
|
|
"valid_targets_mean": 3722.9,
|
|
"valid_targets_min": 1407
|
|
},
|
|
{
|
|
"epoch": 4.70873786407767,
|
|
"grad_norm": 0.8608469799248508,
|
|
"learning_rate": 1.1711724567878095e-05,
|
|
"loss": 0.2979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2957187592983246,
|
|
"step": 2910,
|
|
"valid_targets_mean": 3904.0,
|
|
"valid_targets_min": 1895
|
|
},
|
|
{
|
|
"epoch": 4.716828478964401,
|
|
"grad_norm": 0.7159209130807689,
|
|
"learning_rate": 1.1638349451683237e-05,
|
|
"loss": 0.2712,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2613191604614258,
|
|
"step": 2915,
|
|
"valid_targets_mean": 3557.5,
|
|
"valid_targets_min": 1695
|
|
},
|
|
{
|
|
"epoch": 4.724919093851133,
|
|
"grad_norm": 0.7446038352770069,
|
|
"learning_rate": 1.1565110468160996e-05,
|
|
"loss": 0.2959,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3005721867084503,
|
|
"step": 2920,
|
|
"valid_targets_mean": 3607.1,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 4.733009708737864,
|
|
"grad_norm": 0.7521397453688494,
|
|
"learning_rate": 1.1492008809685856e-05,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2908332347869873,
|
|
"step": 2925,
|
|
"valid_targets_mean": 3941.3,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 4.741100323624595,
|
|
"grad_norm": 0.8564547437417241,
|
|
"learning_rate": 1.141904566639652e-05,
|
|
"loss": 0.2934,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27682605385780334,
|
|
"step": 2930,
|
|
"valid_targets_mean": 2807.7,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 4.749190938511327,
|
|
"grad_norm": 0.5946145119748939,
|
|
"learning_rate": 1.1346222226176606e-05,
|
|
"loss": 0.2838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2541586458683014,
|
|
"step": 2935,
|
|
"valid_targets_mean": 5157.0,
|
|
"valid_targets_min": 1680
|
|
},
|
|
{
|
|
"epoch": 4.757281553398058,
|
|
"grad_norm": 0.532503556021208,
|
|
"learning_rate": 1.1273539674635296e-05,
|
|
"loss": 0.2004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22031345963478088,
|
|
"step": 2940,
|
|
"valid_targets_mean": 5799.4,
|
|
"valid_targets_min": 2379
|
|
},
|
|
{
|
|
"epoch": 4.76537216828479,
|
|
"grad_norm": 0.4872712728960179,
|
|
"learning_rate": 1.1200999195088e-05,
|
|
"loss": 0.1858,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1671465039253235,
|
|
"step": 2945,
|
|
"valid_targets_mean": 6073.6,
|
|
"valid_targets_min": 950
|
|
},
|
|
{
|
|
"epoch": 4.773462783171521,
|
|
"grad_norm": 0.51692554376425,
|
|
"learning_rate": 1.1128601968537111e-05,
|
|
"loss": 0.2031,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19749924540519714,
|
|
"step": 2950,
|
|
"valid_targets_mean": 5545.0,
|
|
"valid_targets_min": 2475
|
|
},
|
|
{
|
|
"epoch": 4.781553398058253,
|
|
"grad_norm": 0.654761675129204,
|
|
"learning_rate": 1.1056349173652791e-05,
|
|
"loss": 0.1983,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26196080446243286,
|
|
"step": 2955,
|
|
"valid_targets_mean": 6686.8,
|
|
"valid_targets_min": 3124
|
|
},
|
|
{
|
|
"epoch": 4.789644012944984,
|
|
"grad_norm": 0.516926906571057,
|
|
"learning_rate": 1.098424198675375e-05,
|
|
"loss": 0.1785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16444584727287292,
|
|
"step": 2960,
|
|
"valid_targets_mean": 4961.1,
|
|
"valid_targets_min": 587
|
|
},
|
|
{
|
|
"epoch": 4.797734627831716,
|
|
"grad_norm": 0.47172896644965434,
|
|
"learning_rate": 1.0912281581788138e-05,
|
|
"loss": 0.1764,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16139261424541473,
|
|
"step": 2965,
|
|
"valid_targets_mean": 6079.8,
|
|
"valid_targets_min": 1883
|
|
},
|
|
{
|
|
"epoch": 4.805825242718447,
|
|
"grad_norm": 0.5379469984334276,
|
|
"learning_rate": 1.0840469130314382e-05,
|
|
"loss": 0.1885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18077893555164337,
|
|
"step": 2970,
|
|
"valid_targets_mean": 4933.7,
|
|
"valid_targets_min": 502
|
|
},
|
|
{
|
|
"epoch": 4.813915857605178,
|
|
"grad_norm": 0.5230760700961994,
|
|
"learning_rate": 1.0768805801482151e-05,
|
|
"loss": 0.1802,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18261444568634033,
|
|
"step": 2975,
|
|
"valid_targets_mean": 5915.8,
|
|
"valid_targets_min": 3455
|
|
},
|
|
{
|
|
"epoch": 4.82200647249191,
|
|
"grad_norm": 0.4689333862630784,
|
|
"learning_rate": 1.0697292762013304e-05,
|
|
"loss": 0.1958,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18166537582874298,
|
|
"step": 2980,
|
|
"valid_targets_mean": 6650.1,
|
|
"valid_targets_min": 2561
|
|
},
|
|
{
|
|
"epoch": 4.830097087378641,
|
|
"grad_norm": 0.49323440105643984,
|
|
"learning_rate": 1.0625931176182905e-05,
|
|
"loss": 0.1871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20641836524009705,
|
|
"step": 2985,
|
|
"valid_targets_mean": 6220.2,
|
|
"valid_targets_min": 2745
|
|
},
|
|
{
|
|
"epoch": 4.8381877022653725,
|
|
"grad_norm": 0.42657327738024303,
|
|
"learning_rate": 1.0554722205800245e-05,
|
|
"loss": 0.161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13979557156562805,
|
|
"step": 2990,
|
|
"valid_targets_mean": 6391.6,
|
|
"valid_targets_min": 2620
|
|
},
|
|
{
|
|
"epoch": 4.8462783171521036,
|
|
"grad_norm": 0.5850989160038047,
|
|
"learning_rate": 1.0483667010189973e-05,
|
|
"loss": 0.1896,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16110314428806305,
|
|
"step": 2995,
|
|
"valid_targets_mean": 5823.4,
|
|
"valid_targets_min": 2335
|
|
},
|
|
{
|
|
"epoch": 4.854368932038835,
|
|
"grad_norm": 0.4719018359213774,
|
|
"learning_rate": 1.0412766746173168e-05,
|
|
"loss": 0.2088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19897709786891937,
|
|
"step": 3000,
|
|
"valid_targets_mean": 6701.3,
|
|
"valid_targets_min": 3184
|
|
},
|
|
{
|
|
"epoch": 4.8624595469255665,
|
|
"grad_norm": 0.4917541720726889,
|
|
"learning_rate": 1.0342022568048543e-05,
|
|
"loss": 0.1911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20104116201400757,
|
|
"step": 3005,
|
|
"valid_targets_mean": 5738.4,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 4.8705501618122975,
|
|
"grad_norm": 0.554594476082386,
|
|
"learning_rate": 1.0271435627573631e-05,
|
|
"loss": 0.2127,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21018058061599731,
|
|
"step": 3010,
|
|
"valid_targets_mean": 5771.9,
|
|
"valid_targets_min": 353
|
|
},
|
|
{
|
|
"epoch": 4.878640776699029,
|
|
"grad_norm": 0.458194799353725,
|
|
"learning_rate": 1.0201007073946041e-05,
|
|
"loss": 0.1745,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18960902094841003,
|
|
"step": 3015,
|
|
"valid_targets_mean": 6883.7,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 4.88673139158576,
|
|
"grad_norm": 0.4901772432602992,
|
|
"learning_rate": 1.0130738053784768e-05,
|
|
"loss": 0.2035,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20777735114097595,
|
|
"step": 3020,
|
|
"valid_targets_mean": 5864.7,
|
|
"valid_targets_min": 922
|
|
},
|
|
{
|
|
"epoch": 4.894822006472491,
|
|
"grad_norm": 0.4858932378674697,
|
|
"learning_rate": 1.0060629711111494e-05,
|
|
"loss": 0.2267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21894928812980652,
|
|
"step": 3025,
|
|
"valid_targets_mean": 6462.8,
|
|
"valid_targets_min": 3448
|
|
},
|
|
{
|
|
"epoch": 4.902912621359223,
|
|
"grad_norm": 0.505821195385407,
|
|
"learning_rate": 9.99068318733195e-06,
|
|
"loss": 0.1856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21098601818084717,
|
|
"step": 3030,
|
|
"valid_targets_mean": 6119.8,
|
|
"valid_targets_min": 351
|
|
},
|
|
{
|
|
"epoch": 4.911003236245954,
|
|
"grad_norm": 0.48153855254257244,
|
|
"learning_rate": 9.92089962121741e-06,
|
|
"loss": 0.1844,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22040508687496185,
|
|
"step": 3035,
|
|
"valid_targets_mean": 5484.6,
|
|
"valid_targets_min": 484
|
|
},
|
|
{
|
|
"epoch": 4.919093851132686,
|
|
"grad_norm": 0.49628525274695584,
|
|
"learning_rate": 9.851280148886061e-06,
|
|
"loss": 0.1837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15337306261062622,
|
|
"step": 3040,
|
|
"valid_targets_mean": 6356.4,
|
|
"valid_targets_min": 3465
|
|
},
|
|
{
|
|
"epoch": 4.927184466019417,
|
|
"grad_norm": 0.4569887725574397,
|
|
"learning_rate": 9.78182590378455e-06,
|
|
"loss": 0.1705,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1723887175321579,
|
|
"step": 3045,
|
|
"valid_targets_mean": 7204.9,
|
|
"valid_targets_min": 2930
|
|
},
|
|
{
|
|
"epoch": 4.935275080906149,
|
|
"grad_norm": 0.5296992143508796,
|
|
"learning_rate": 9.712538016669557e-06,
|
|
"loss": 0.1973,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18016985058784485,
|
|
"step": 3050,
|
|
"valid_targets_mean": 4959.0,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 4.94336569579288,
|
|
"grad_norm": 0.46605102810031673,
|
|
"learning_rate": 9.643417615589299e-06,
|
|
"loss": 0.1702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13274593651294708,
|
|
"step": 3055,
|
|
"valid_targets_mean": 5758.4,
|
|
"valid_targets_min": 3177
|
|
},
|
|
{
|
|
"epoch": 4.951456310679612,
|
|
"grad_norm": 0.4714801915792423,
|
|
"learning_rate": 9.574465825865276e-06,
|
|
"loss": 0.1797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20519624650478363,
|
|
"step": 3060,
|
|
"valid_targets_mean": 6073.0,
|
|
"valid_targets_min": 2860
|
|
},
|
|
{
|
|
"epoch": 4.959546925566343,
|
|
"grad_norm": 0.45620286660464315,
|
|
"learning_rate": 9.50568377007386e-06,
|
|
"loss": 0.1813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15038716793060303,
|
|
"step": 3065,
|
|
"valid_targets_mean": 5933.8,
|
|
"valid_targets_min": 3933
|
|
},
|
|
{
|
|
"epoch": 4.967637540453074,
|
|
"grad_norm": 0.4825590764718128,
|
|
"learning_rate": 9.43707256802806e-06,
|
|
"loss": 0.203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.191915825009346,
|
|
"step": 3070,
|
|
"valid_targets_mean": 6224.9,
|
|
"valid_targets_min": 418
|
|
},
|
|
{
|
|
"epoch": 4.975728155339806,
|
|
"grad_norm": 0.49435258210955274,
|
|
"learning_rate": 9.368633336759292e-06,
|
|
"loss": 0.2068,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2108960747718811,
|
|
"step": 3075,
|
|
"valid_targets_mean": 6257.8,
|
|
"valid_targets_min": 2511
|
|
},
|
|
{
|
|
"epoch": 4.983818770226537,
|
|
"grad_norm": 0.5386278923623121,
|
|
"learning_rate": 9.300367190499178e-06,
|
|
"loss": 0.1952,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2313598096370697,
|
|
"step": 3080,
|
|
"valid_targets_mean": 5614.5,
|
|
"valid_targets_min": 4016
|
|
},
|
|
{
|
|
"epoch": 4.991909385113269,
|
|
"grad_norm": 0.43193135908684027,
|
|
"learning_rate": 9.232275240661403e-06,
|
|
"loss": 0.1901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2286348193883896,
|
|
"step": 3085,
|
|
"valid_targets_mean": 7461.4,
|
|
"valid_targets_min": 3296
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.5212733592125819,
|
|
"learning_rate": 9.164358595823661e-06,
|
|
"loss": 0.1827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21457836031913757,
|
|
"step": 3090,
|
|
"valid_targets_mean": 6528.7,
|
|
"valid_targets_min": 1969
|
|
},
|
|
{
|
|
"epoch": 5.008090614886731,
|
|
"grad_norm": 0.5319495569626199,
|
|
"learning_rate": 9.096618361709545e-06,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3067016005516052,
|
|
"step": 3095,
|
|
"valid_targets_mean": 7367.1,
|
|
"valid_targets_min": 1791
|
|
},
|
|
{
|
|
"epoch": 5.016181229773463,
|
|
"grad_norm": 0.5211577495479047,
|
|
"learning_rate": 9.029055641170588e-06,
|
|
"loss": 0.2771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2768145203590393,
|
|
"step": 3100,
|
|
"valid_targets_mean": 8089.4,
|
|
"valid_targets_min": 5145
|
|
},
|
|
{
|
|
"epoch": 5.024271844660194,
|
|
"grad_norm": 0.5295823335577148,
|
|
"learning_rate": 8.961671534168292e-06,
|
|
"loss": 0.2793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2725014388561249,
|
|
"step": 3105,
|
|
"valid_targets_mean": 7736.6,
|
|
"valid_targets_min": 2978
|
|
},
|
|
{
|
|
"epoch": 5.032362459546926,
|
|
"grad_norm": 0.7263128119198212,
|
|
"learning_rate": 8.894467137756228e-06,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3000566363334656,
|
|
"step": 3110,
|
|
"valid_targets_mean": 7745.0,
|
|
"valid_targets_min": 2988
|
|
},
|
|
{
|
|
"epoch": 5.040453074433657,
|
|
"grad_norm": 0.45728545699209733,
|
|
"learning_rate": 8.827443546062165e-06,
|
|
"loss": 0.2602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2903977036476135,
|
|
"step": 3115,
|
|
"valid_targets_mean": 7508.9,
|
|
"valid_targets_min": 3292
|
|
},
|
|
{
|
|
"epoch": 5.048543689320389,
|
|
"grad_norm": 0.46957717103053953,
|
|
"learning_rate": 8.760601850270277e-06,
|
|
"loss": 0.29,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30360230803489685,
|
|
"step": 3120,
|
|
"valid_targets_mean": 7365.6,
|
|
"valid_targets_min": 3274
|
|
},
|
|
{
|
|
"epoch": 5.05663430420712,
|
|
"grad_norm": 0.5000349877363641,
|
|
"learning_rate": 8.69394313860335e-06,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3295597732067108,
|
|
"step": 3125,
|
|
"valid_targets_mean": 7283.9,
|
|
"valid_targets_min": 2285
|
|
},
|
|
{
|
|
"epoch": 5.064724919093851,
|
|
"grad_norm": 0.4761118410851209,
|
|
"learning_rate": 8.62746849630508e-06,
|
|
"loss": 0.2668,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27158045768737793,
|
|
"step": 3130,
|
|
"valid_targets_mean": 7289.2,
|
|
"valid_targets_min": 2822
|
|
},
|
|
{
|
|
"epoch": 5.072815533980583,
|
|
"grad_norm": 0.45387036145946663,
|
|
"learning_rate": 8.561179005622411e-06,
|
|
"loss": 0.2436,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25855568051338196,
|
|
"step": 3135,
|
|
"valid_targets_mean": 7387.5,
|
|
"valid_targets_min": 3823
|
|
},
|
|
{
|
|
"epoch": 5.080906148867314,
|
|
"grad_norm": 0.5514967218526795,
|
|
"learning_rate": 8.495075745787895e-06,
|
|
"loss": 0.2666,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2519291341304779,
|
|
"step": 3140,
|
|
"valid_targets_mean": 6462.8,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 5.088996763754046,
|
|
"grad_norm": 0.48360092752777134,
|
|
"learning_rate": 8.429159793002164e-06,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31536170840263367,
|
|
"step": 3145,
|
|
"valid_targets_mean": 9115.1,
|
|
"valid_targets_min": 5927
|
|
},
|
|
{
|
|
"epoch": 5.097087378640777,
|
|
"grad_norm": 0.506081856785789,
|
|
"learning_rate": 8.363432220416336e-06,
|
|
"loss": 0.2914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3049050569534302,
|
|
"step": 3150,
|
|
"valid_targets_mean": 7797.2,
|
|
"valid_targets_min": 1985
|
|
},
|
|
{
|
|
"epoch": 5.105177993527508,
|
|
"grad_norm": 0.5176036211803096,
|
|
"learning_rate": 8.297894098114612e-06,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24461714923381805,
|
|
"step": 3155,
|
|
"valid_targets_mean": 7330.4,
|
|
"valid_targets_min": 2779
|
|
},
|
|
{
|
|
"epoch": 5.11326860841424,
|
|
"grad_norm": 0.45319450230478064,
|
|
"learning_rate": 8.232546493096836e-06,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2451840341091156,
|
|
"step": 3160,
|
|
"valid_targets_mean": 7086.4,
|
|
"valid_targets_min": 2870
|
|
},
|
|
{
|
|
"epoch": 5.121359223300971,
|
|
"grad_norm": 0.5013359160427914,
|
|
"learning_rate": 8.167390469261105e-06,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31205499172210693,
|
|
"step": 3165,
|
|
"valid_targets_mean": 8391.3,
|
|
"valid_targets_min": 880
|
|
},
|
|
{
|
|
"epoch": 5.1294498381877025,
|
|
"grad_norm": 0.48835871990641466,
|
|
"learning_rate": 8.102427087386457e-06,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24966265261173248,
|
|
"step": 3170,
|
|
"valid_targets_mean": 6497.2,
|
|
"valid_targets_min": 3127
|
|
},
|
|
{
|
|
"epoch": 5.1375404530744335,
|
|
"grad_norm": 0.47289910533912854,
|
|
"learning_rate": 8.037657405115611e-06,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2476363182067871,
|
|
"step": 3175,
|
|
"valid_targets_mean": 6541.8,
|
|
"valid_targets_min": 3567
|
|
},
|
|
{
|
|
"epoch": 5.145631067961165,
|
|
"grad_norm": 0.47539629536114814,
|
|
"learning_rate": 7.973082476937728e-06,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2635002136230469,
|
|
"step": 3180,
|
|
"valid_targets_mean": 7617.8,
|
|
"valid_targets_min": 3885
|
|
},
|
|
{
|
|
"epoch": 5.1537216828478964,
|
|
"grad_norm": 0.5585549485849161,
|
|
"learning_rate": 7.908703354171283e-06,
|
|
"loss": 0.2897,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30579209327697754,
|
|
"step": 3185,
|
|
"valid_targets_mean": 5710.0,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 5.1618122977346275,
|
|
"grad_norm": 0.44686022386718305,
|
|
"learning_rate": 7.844521084946895e-06,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1834941804409027,
|
|
"step": 3190,
|
|
"valid_targets_mean": 6085.1,
|
|
"valid_targets_min": 3073
|
|
},
|
|
{
|
|
"epoch": 5.169902912621359,
|
|
"grad_norm": 0.4676860962444573,
|
|
"learning_rate": 7.780536714190298e-06,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26917046308517456,
|
|
"step": 3195,
|
|
"valid_targets_mean": 7739.5,
|
|
"valid_targets_min": 3524
|
|
},
|
|
{
|
|
"epoch": 5.17799352750809,
|
|
"grad_norm": 0.4986507281328184,
|
|
"learning_rate": 7.716751283605324e-06,
|
|
"loss": 0.2788,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29654639959335327,
|
|
"step": 3200,
|
|
"valid_targets_mean": 7832.8,
|
|
"valid_targets_min": 2404
|
|
},
|
|
{
|
|
"epoch": 5.186084142394822,
|
|
"grad_norm": 0.47522046433557813,
|
|
"learning_rate": 7.653165831656937e-06,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2719336450099945,
|
|
"step": 3205,
|
|
"valid_targets_mean": 6810.3,
|
|
"valid_targets_min": 3979
|
|
},
|
|
{
|
|
"epoch": 5.194174757281553,
|
|
"grad_norm": 0.4476741173784591,
|
|
"learning_rate": 7.589781393554321e-06,
|
|
"loss": 0.2614,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2759358584880829,
|
|
"step": 3210,
|
|
"valid_targets_mean": 7624.2,
|
|
"valid_targets_min": 3905
|
|
},
|
|
{
|
|
"epoch": 5.202265372168285,
|
|
"grad_norm": 0.5411741804025072,
|
|
"learning_rate": 7.526599001234058e-06,
|
|
"loss": 0.2776,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2656782269477844,
|
|
"step": 3215,
|
|
"valid_targets_mean": 6795.1,
|
|
"valid_targets_min": 3148
|
|
},
|
|
{
|
|
"epoch": 5.210355987055016,
|
|
"grad_norm": 0.8063416501663134,
|
|
"learning_rate": 7.463619683343284e-06,
|
|
"loss": 0.3344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3292917013168335,
|
|
"step": 3220,
|
|
"valid_targets_mean": 3099.5,
|
|
"valid_targets_min": 1256
|
|
},
|
|
{
|
|
"epoch": 5.218446601941747,
|
|
"grad_norm": 0.7523254899644706,
|
|
"learning_rate": 7.400844465222963e-06,
|
|
"loss": 0.355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3488837480545044,
|
|
"step": 3225,
|
|
"valid_targets_mean": 3709.8,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 5.226537216828479,
|
|
"grad_norm": 0.8129960141626508,
|
|
"learning_rate": 7.338274368891198e-06,
|
|
"loss": 0.3212,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29325389862060547,
|
|
"step": 3230,
|
|
"valid_targets_mean": 2654.6,
|
|
"valid_targets_min": 1082
|
|
},
|
|
{
|
|
"epoch": 5.23462783171521,
|
|
"grad_norm": 0.84891707484108,
|
|
"learning_rate": 7.275910413026579e-06,
|
|
"loss": 0.3384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32466965913772583,
|
|
"step": 3235,
|
|
"valid_targets_mean": 2991.8,
|
|
"valid_targets_min": 1159
|
|
},
|
|
{
|
|
"epoch": 5.242718446601942,
|
|
"grad_norm": 0.8728926495709631,
|
|
"learning_rate": 7.213753612951624e-06,
|
|
"loss": 0.3494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38176876306533813,
|
|
"step": 3240,
|
|
"valid_targets_mean": 3261.2,
|
|
"valid_targets_min": 1281
|
|
},
|
|
{
|
|
"epoch": 5.250809061488673,
|
|
"grad_norm": 0.8433417368439166,
|
|
"learning_rate": 7.1518049806162196e-06,
|
|
"loss": 0.338,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35416272282600403,
|
|
"step": 3245,
|
|
"valid_targets_mean": 3676.6,
|
|
"valid_targets_min": 1603
|
|
},
|
|
{
|
|
"epoch": 5.258899676375404,
|
|
"grad_norm": 0.8489581240200976,
|
|
"learning_rate": 7.090065524581136e-06,
|
|
"loss": 0.3485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3246349096298218,
|
|
"step": 3250,
|
|
"valid_targets_mean": 3011.1,
|
|
"valid_targets_min": 1422
|
|
},
|
|
{
|
|
"epoch": 5.266990291262136,
|
|
"grad_norm": 0.8018879913146477,
|
|
"learning_rate": 7.0285362500016675e-06,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29004016518592834,
|
|
"step": 3255,
|
|
"valid_targets_mean": 3071.8,
|
|
"valid_targets_min": 1277
|
|
},
|
|
{
|
|
"epoch": 5.275080906148867,
|
|
"grad_norm": 0.8444531679837199,
|
|
"learning_rate": 6.967218158611202e-06,
|
|
"loss": 0.3161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3274296522140503,
|
|
"step": 3260,
|
|
"valid_targets_mean": 3090.8,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 5.283171521035599,
|
|
"grad_norm": 1.4990565330268864,
|
|
"learning_rate": 6.906112248704939e-06,
|
|
"loss": 0.3458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3334260582923889,
|
|
"step": 3265,
|
|
"valid_targets_mean": 3369.2,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 5.29126213592233,
|
|
"grad_norm": 0.8525857578497839,
|
|
"learning_rate": 6.845219515123667e-06,
|
|
"loss": 0.3356,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38606202602386475,
|
|
"step": 3270,
|
|
"valid_targets_mean": 3657.6,
|
|
"valid_targets_min": 1556
|
|
},
|
|
{
|
|
"epoch": 5.299352750809062,
|
|
"grad_norm": 1.0945788176494664,
|
|
"learning_rate": 6.784540949237484e-06,
|
|
"loss": 0.3261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3346405625343323,
|
|
"step": 3275,
|
|
"valid_targets_mean": 2946.7,
|
|
"valid_targets_min": 860
|
|
},
|
|
{
|
|
"epoch": 5.307443365695793,
|
|
"grad_norm": 0.7396805936736428,
|
|
"learning_rate": 6.724077538929759e-06,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3220456838607788,
|
|
"step": 3280,
|
|
"valid_targets_mean": 3523.2,
|
|
"valid_targets_min": 1404
|
|
},
|
|
{
|
|
"epoch": 5.315533980582524,
|
|
"grad_norm": 0.6013604910785784,
|
|
"learning_rate": 6.663830268580971e-06,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2951660752296448,
|
|
"step": 3285,
|
|
"valid_targets_mean": 5521.4,
|
|
"valid_targets_min": 1360
|
|
},
|
|
{
|
|
"epoch": 5.323624595469256,
|
|
"grad_norm": 0.8298768134992649,
|
|
"learning_rate": 6.6038001190527146e-06,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3154316246509552,
|
|
"step": 3290,
|
|
"valid_targets_mean": 3396.9,
|
|
"valid_targets_min": 1336
|
|
},
|
|
{
|
|
"epoch": 5.331715210355987,
|
|
"grad_norm": 0.738625989323921,
|
|
"learning_rate": 6.543988067671752e-06,
|
|
"loss": 0.319,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32287925481796265,
|
|
"step": 3295,
|
|
"valid_targets_mean": 3617.4,
|
|
"valid_targets_min": 1726
|
|
},
|
|
{
|
|
"epoch": 5.339805825242719,
|
|
"grad_norm": 0.7679103475244519,
|
|
"learning_rate": 6.484395088214037e-06,
|
|
"loss": 0.3181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35048407316207886,
|
|
"step": 3300,
|
|
"valid_targets_mean": 4753.2,
|
|
"valid_targets_min": 1146
|
|
},
|
|
{
|
|
"epoch": 5.34789644012945,
|
|
"grad_norm": 0.7895088467002501,
|
|
"learning_rate": 6.425022150888924e-06,
|
|
"loss": 0.3117,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2957543730735779,
|
|
"step": 3305,
|
|
"valid_targets_mean": 2988.0,
|
|
"valid_targets_min": 1665
|
|
},
|
|
{
|
|
"epoch": 5.355987055016181,
|
|
"grad_norm": 0.8257566232159592,
|
|
"learning_rate": 6.36587022232336e-06,
|
|
"loss": 0.3257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3673299551010132,
|
|
"step": 3310,
|
|
"valid_targets_mean": 3400.9,
|
|
"valid_targets_min": 985
|
|
},
|
|
{
|
|
"epoch": 5.364077669902913,
|
|
"grad_norm": 0.7185396884990038,
|
|
"learning_rate": 6.306940265546117e-06,
|
|
"loss": 0.3203,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3062174320220947,
|
|
"step": 3315,
|
|
"valid_targets_mean": 3745.3,
|
|
"valid_targets_min": 1398
|
|
},
|
|
{
|
|
"epoch": 5.372168284789644,
|
|
"grad_norm": 0.733095396914552,
|
|
"learning_rate": 6.248233239972144e-06,
|
|
"loss": 0.3116,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2923792600631714,
|
|
"step": 3320,
|
|
"valid_targets_mean": 3360.9,
|
|
"valid_targets_min": 1133
|
|
},
|
|
{
|
|
"epoch": 5.380258899676376,
|
|
"grad_norm": 0.8773111466520589,
|
|
"learning_rate": 6.189750101386931e-06,
|
|
"loss": 0.3291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.35776978731155396,
|
|
"step": 3325,
|
|
"valid_targets_mean": 3318.2,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 5.388349514563107,
|
|
"grad_norm": 0.8173794193078433,
|
|
"learning_rate": 6.1314918019309535e-06,
|
|
"loss": 0.333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33896470069885254,
|
|
"step": 3330,
|
|
"valid_targets_mean": 3564.1,
|
|
"valid_targets_min": 1035
|
|
},
|
|
{
|
|
"epoch": 5.3964401294498385,
|
|
"grad_norm": 1.189059892174759,
|
|
"learning_rate": 6.073459290084185e-06,
|
|
"loss": 0.3134,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.303739458322525,
|
|
"step": 3335,
|
|
"valid_targets_mean": 2470.4,
|
|
"valid_targets_min": 1285
|
|
},
|
|
{
|
|
"epoch": 5.4045307443365695,
|
|
"grad_norm": 0.7616019973262189,
|
|
"learning_rate": 6.01565351065063e-06,
|
|
"loss": 0.3099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26032713055610657,
|
|
"step": 3340,
|
|
"valid_targets_mean": 3419.8,
|
|
"valid_targets_min": 1415
|
|
},
|
|
{
|
|
"epoch": 5.412621359223301,
|
|
"grad_norm": 0.6846961329277493,
|
|
"learning_rate": 5.958075404742951e-06,
|
|
"loss": 0.3137,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30751484632492065,
|
|
"step": 3345,
|
|
"valid_targets_mean": 3587.9,
|
|
"valid_targets_min": 1375
|
|
},
|
|
{
|
|
"epoch": 5.4207119741100325,
|
|
"grad_norm": 0.8653748694772552,
|
|
"learning_rate": 5.900725909767155e-06,
|
|
"loss": 0.3355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2950606048107147,
|
|
"step": 3350,
|
|
"valid_targets_mean": 2614.4,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 5.4288025889967635,
|
|
"grad_norm": 0.7715947516706176,
|
|
"learning_rate": 5.843605959407326e-06,
|
|
"loss": 0.3092,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31399863958358765,
|
|
"step": 3355,
|
|
"valid_targets_mean": 3381.9,
|
|
"valid_targets_min": 1125
|
|
},
|
|
{
|
|
"epoch": 5.436893203883495,
|
|
"grad_norm": 0.8373987039194564,
|
|
"learning_rate": 5.7867164836104174e-06,
|
|
"loss": 0.3163,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.279275119304657,
|
|
"step": 3360,
|
|
"valid_targets_mean": 3233.6,
|
|
"valid_targets_min": 1199
|
|
},
|
|
{
|
|
"epoch": 5.444983818770226,
|
|
"grad_norm": 0.9106273725610025,
|
|
"learning_rate": 5.730058408571135e-06,
|
|
"loss": 0.3104,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3223729729652405,
|
|
"step": 3365,
|
|
"valid_targets_mean": 3305.2,
|
|
"valid_targets_min": 1344
|
|
},
|
|
{
|
|
"epoch": 5.453074433656958,
|
|
"grad_norm": 0.7994891025143966,
|
|
"learning_rate": 5.673632656716825e-06,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31081345677375793,
|
|
"step": 3370,
|
|
"valid_targets_mean": 3871.6,
|
|
"valid_targets_min": 1205
|
|
},
|
|
{
|
|
"epoch": 5.461165048543689,
|
|
"grad_norm": 0.7624042577791246,
|
|
"learning_rate": 5.617440146692485e-06,
|
|
"loss": 0.3069,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30954042077064514,
|
|
"step": 3375,
|
|
"valid_targets_mean": 3910.4,
|
|
"valid_targets_min": 1450
|
|
},
|
|
{
|
|
"epoch": 5.46925566343042,
|
|
"grad_norm": 0.7095484444373912,
|
|
"learning_rate": 5.561481793345786e-06,
|
|
"loss": 0.2977,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2747786045074463,
|
|
"step": 3380,
|
|
"valid_targets_mean": 4702.8,
|
|
"valid_targets_min": 2246
|
|
},
|
|
{
|
|
"epoch": 5.477346278317152,
|
|
"grad_norm": 0.7902102023863703,
|
|
"learning_rate": 5.505758507712196e-06,
|
|
"loss": 0.3146,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31908634305000305,
|
|
"step": 3385,
|
|
"valid_targets_mean": 3323.4,
|
|
"valid_targets_min": 1348
|
|
},
|
|
{
|
|
"epoch": 5.485436893203883,
|
|
"grad_norm": 0.7074947155569701,
|
|
"learning_rate": 5.450271197000128e-06,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28372061252593994,
|
|
"step": 3390,
|
|
"valid_targets_mean": 3920.9,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 5.493527508090615,
|
|
"grad_norm": 0.6367141439259998,
|
|
"learning_rate": 5.395020764576211e-06,
|
|
"loss": 0.2876,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3219951093196869,
|
|
"step": 3395,
|
|
"valid_targets_mean": 4233.6,
|
|
"valid_targets_min": 1361
|
|
},
|
|
{
|
|
"epoch": 5.501618122977346,
|
|
"grad_norm": 0.7259097102070373,
|
|
"learning_rate": 5.340008109950512e-06,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3297671675682068,
|
|
"step": 3400,
|
|
"valid_targets_mean": 3595.1,
|
|
"valid_targets_min": 1368
|
|
},
|
|
{
|
|
"epoch": 5.509708737864077,
|
|
"grad_norm": 0.9618474062674968,
|
|
"learning_rate": 5.285234128761969e-06,
|
|
"loss": 0.3086,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30990469455718994,
|
|
"step": 3405,
|
|
"valid_targets_mean": 3354.8,
|
|
"valid_targets_min": 1186
|
|
},
|
|
{
|
|
"epoch": 5.517799352750809,
|
|
"grad_norm": 0.6714184926341713,
|
|
"learning_rate": 5.230699712763758e-06,
|
|
"loss": 0.2823,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2863173484802246,
|
|
"step": 3410,
|
|
"valid_targets_mean": 4103.5,
|
|
"valid_targets_min": 1405
|
|
},
|
|
{
|
|
"epoch": 5.52588996763754,
|
|
"grad_norm": 0.8589890875883771,
|
|
"learning_rate": 5.176405749808786e-06,
|
|
"loss": 0.2911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28313857316970825,
|
|
"step": 3415,
|
|
"valid_targets_mean": 4204.2,
|
|
"valid_targets_min": 941
|
|
},
|
|
{
|
|
"epoch": 5.533980582524272,
|
|
"grad_norm": 0.6924780807211832,
|
|
"learning_rate": 5.122353123835262e-06,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.309023916721344,
|
|
"step": 3420,
|
|
"valid_targets_mean": 4473.8,
|
|
"valid_targets_min": 1696
|
|
},
|
|
{
|
|
"epoch": 5.542071197411003,
|
|
"grad_norm": 0.8142961750957524,
|
|
"learning_rate": 5.068542714852254e-06,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.273823082447052,
|
|
"step": 3425,
|
|
"valid_targets_mean": 2837.8,
|
|
"valid_targets_min": 1330
|
|
},
|
|
{
|
|
"epoch": 5.550161812297735,
|
|
"grad_norm": 0.6118772127551925,
|
|
"learning_rate": 5.014975398925408e-06,
|
|
"loss": 0.2901,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26475781202316284,
|
|
"step": 3430,
|
|
"valid_targets_mean": 4635.1,
|
|
"valid_targets_min": 1166
|
|
},
|
|
{
|
|
"epoch": 5.558252427184466,
|
|
"grad_norm": 0.7697161843514183,
|
|
"learning_rate": 4.9616520481626794e-06,
|
|
"loss": 0.2882,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.329695463180542,
|
|
"step": 3435,
|
|
"valid_targets_mean": 3667.0,
|
|
"valid_targets_min": 1395
|
|
},
|
|
{
|
|
"epoch": 5.566343042071198,
|
|
"grad_norm": 0.6704301668195795,
|
|
"learning_rate": 4.908573530700111e-06,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28023281693458557,
|
|
"step": 3440,
|
|
"valid_targets_mean": 4262.1,
|
|
"valid_targets_min": 940
|
|
},
|
|
{
|
|
"epoch": 5.574433656957929,
|
|
"grad_norm": 0.7411191986928855,
|
|
"learning_rate": 4.8557407106877175e-06,
|
|
"loss": 0.3049,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2993595004081726,
|
|
"step": 3445,
|
|
"valid_targets_mean": 3191.4,
|
|
"valid_targets_min": 1688
|
|
},
|
|
{
|
|
"epoch": 5.58252427184466,
|
|
"grad_norm": 0.6780175654020204,
|
|
"learning_rate": 4.8031544482754136e-06,
|
|
"loss": 0.2772,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27136701345443726,
|
|
"step": 3450,
|
|
"valid_targets_mean": 3711.2,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 5.590614886731392,
|
|
"grad_norm": 0.6656730656773023,
|
|
"learning_rate": 4.7508155995989944e-06,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25287604331970215,
|
|
"step": 3455,
|
|
"valid_targets_mean": 4141.0,
|
|
"valid_targets_min": 1576
|
|
},
|
|
{
|
|
"epoch": 5.598705501618123,
|
|
"grad_norm": 0.7430435623053471,
|
|
"learning_rate": 4.6987250167662435e-06,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3740665316581726,
|
|
"step": 3460,
|
|
"valid_targets_mean": 3920.6,
|
|
"valid_targets_min": 993
|
|
},
|
|
{
|
|
"epoch": 5.606796116504855,
|
|
"grad_norm": 0.8089842814454896,
|
|
"learning_rate": 4.6468835478430045e-06,
|
|
"loss": 0.2806,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27529019117355347,
|
|
"step": 3465,
|
|
"valid_targets_mean": 2930.4,
|
|
"valid_targets_min": 1271
|
|
},
|
|
{
|
|
"epoch": 5.614886731391586,
|
|
"grad_norm": 0.7119442724378142,
|
|
"learning_rate": 4.595292036839383e-06,
|
|
"loss": 0.3004,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3111814856529236,
|
|
"step": 3470,
|
|
"valid_targets_mean": 3889.4,
|
|
"valid_targets_min": 1109
|
|
},
|
|
{
|
|
"epoch": 5.622977346278317,
|
|
"grad_norm": 0.6843256595943354,
|
|
"learning_rate": 4.543951323696058e-06,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2997050881385803,
|
|
"step": 3475,
|
|
"valid_targets_mean": 4169.0,
|
|
"valid_targets_min": 1262
|
|
},
|
|
{
|
|
"epoch": 5.631067961165049,
|
|
"grad_norm": 0.9105040696365415,
|
|
"learning_rate": 4.492862244270544e-06,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28833556175231934,
|
|
"step": 3480,
|
|
"valid_targets_mean": 2340.9,
|
|
"valid_targets_min": 1380
|
|
},
|
|
{
|
|
"epoch": 5.63915857605178,
|
|
"grad_norm": 0.8519710587688972,
|
|
"learning_rate": 4.442025630323607e-06,
|
|
"loss": 0.2839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2577669322490692,
|
|
"step": 3485,
|
|
"valid_targets_mean": 3383.8,
|
|
"valid_targets_min": 1136
|
|
},
|
|
{
|
|
"epoch": 5.647249190938512,
|
|
"grad_norm": 0.6327989909826687,
|
|
"learning_rate": 4.3914423095057516e-06,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2794429659843445,
|
|
"step": 3490,
|
|
"valid_targets_mean": 4683.7,
|
|
"valid_targets_min": 1129
|
|
},
|
|
{
|
|
"epoch": 5.655339805825243,
|
|
"grad_norm": 0.7673156000554755,
|
|
"learning_rate": 4.341113105343673e-06,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29908567667007446,
|
|
"step": 3495,
|
|
"valid_targets_mean": 3994.8,
|
|
"valid_targets_min": 1930
|
|
},
|
|
{
|
|
"epoch": 5.663430420711974,
|
|
"grad_norm": 0.6723541515246171,
|
|
"learning_rate": 4.291038837226935e-06,
|
|
"loss": 0.2665,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26810508966445923,
|
|
"step": 3500,
|
|
"valid_targets_mean": 3837.7,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 5.671521035598706,
|
|
"grad_norm": 0.9407028763902802,
|
|
"learning_rate": 4.241220320394574e-06,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2722015082836151,
|
|
"step": 3505,
|
|
"valid_targets_mean": 3206.0,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 5.679611650485437,
|
|
"grad_norm": 0.7507137274318326,
|
|
"learning_rate": 4.191658365921838e-06,
|
|
"loss": 0.2687,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24956271052360535,
|
|
"step": 3510,
|
|
"valid_targets_mean": 3158.6,
|
|
"valid_targets_min": 1188
|
|
},
|
|
{
|
|
"epoch": 5.6877022653721685,
|
|
"grad_norm": 0.633710354363695,
|
|
"learning_rate": 4.1423537807070065e-06,
|
|
"loss": 0.2869,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28306400775909424,
|
|
"step": 3515,
|
|
"valid_targets_mean": 4985.4,
|
|
"valid_targets_min": 1162
|
|
},
|
|
{
|
|
"epoch": 5.6957928802588995,
|
|
"grad_norm": 0.7106919845148538,
|
|
"learning_rate": 4.0933073674582054e-06,
|
|
"loss": 0.2942,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28822749853134155,
|
|
"step": 3520,
|
|
"valid_targets_mean": 3605.8,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 5.703883495145631,
|
|
"grad_norm": 0.7709486431215857,
|
|
"learning_rate": 4.044519924680379e-06,
|
|
"loss": 0.2891,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3167271018028259,
|
|
"step": 3525,
|
|
"valid_targets_mean": 3628.4,
|
|
"valid_targets_min": 1204
|
|
},
|
|
{
|
|
"epoch": 5.711974110032362,
|
|
"grad_norm": 0.6752644698813571,
|
|
"learning_rate": 3.99599224666229e-06,
|
|
"loss": 0.2582,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25341057777404785,
|
|
"step": 3530,
|
|
"valid_targets_mean": 4005.9,
|
|
"valid_targets_min": 1431
|
|
},
|
|
{
|
|
"epoch": 5.720064724919094,
|
|
"grad_norm": 0.6562003474426554,
|
|
"learning_rate": 3.947725123463559e-06,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.236685648560524,
|
|
"step": 3535,
|
|
"valid_targets_mean": 3881.7,
|
|
"valid_targets_min": 1211
|
|
},
|
|
{
|
|
"epoch": 5.728155339805825,
|
|
"grad_norm": 0.7556232041762883,
|
|
"learning_rate": 3.8997193409018245e-06,
|
|
"loss": 0.289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27064526081085205,
|
|
"step": 3540,
|
|
"valid_targets_mean": 3740.8,
|
|
"valid_targets_min": 1231
|
|
},
|
|
{
|
|
"epoch": 5.736245954692556,
|
|
"grad_norm": 0.7338747249872632,
|
|
"learning_rate": 3.851975680539941e-06,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25058260560035706,
|
|
"step": 3545,
|
|
"valid_targets_mean": 3616.2,
|
|
"valid_targets_min": 1364
|
|
},
|
|
{
|
|
"epoch": 5.744336569579288,
|
|
"grad_norm": 0.6512233633964314,
|
|
"learning_rate": 3.804494919673254e-06,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25747865438461304,
|
|
"step": 3550,
|
|
"valid_targets_mean": 3856.8,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 5.752427184466019,
|
|
"grad_norm": 0.5877368860493349,
|
|
"learning_rate": 3.757277831316961e-06,
|
|
"loss": 0.239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17853707075119019,
|
|
"step": 3555,
|
|
"valid_targets_mean": 6443.2,
|
|
"valid_targets_min": 2485
|
|
},
|
|
{
|
|
"epoch": 5.760517799352751,
|
|
"grad_norm": 0.5882911139733148,
|
|
"learning_rate": 3.7103251841934993e-06,
|
|
"loss": 0.1935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1725941300392151,
|
|
"step": 3560,
|
|
"valid_targets_mean": 5332.4,
|
|
"valid_targets_min": 824
|
|
},
|
|
{
|
|
"epoch": 5.768608414239482,
|
|
"grad_norm": 0.5213804910886257,
|
|
"learning_rate": 3.663637742720052e-06,
|
|
"loss": 0.181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20349499583244324,
|
|
"step": 3565,
|
|
"valid_targets_mean": 6187.7,
|
|
"valid_targets_min": 2207
|
|
},
|
|
{
|
|
"epoch": 5.776699029126213,
|
|
"grad_norm": 0.4967460727403076,
|
|
"learning_rate": 3.617216266996093e-06,
|
|
"loss": 0.1791,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14877915382385254,
|
|
"step": 3570,
|
|
"valid_targets_mean": 5631.6,
|
|
"valid_targets_min": 2795
|
|
},
|
|
{
|
|
"epoch": 5.784789644012945,
|
|
"grad_norm": 0.46520895730300954,
|
|
"learning_rate": 3.571061512791012e-06,
|
|
"loss": 0.2074,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2052791714668274,
|
|
"step": 3575,
|
|
"valid_targets_mean": 7407.4,
|
|
"valid_targets_min": 1902
|
|
},
|
|
{
|
|
"epoch": 5.792880258899676,
|
|
"grad_norm": 0.605749686688732,
|
|
"learning_rate": 3.525174231531814e-06,
|
|
"loss": 0.1533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17778176069259644,
|
|
"step": 3580,
|
|
"valid_targets_mean": 5209.8,
|
|
"valid_targets_min": 2991
|
|
},
|
|
{
|
|
"epoch": 5.800970873786408,
|
|
"grad_norm": 0.5738206053924397,
|
|
"learning_rate": 3.4795551702908935e-06,
|
|
"loss": 0.181,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23292459547519684,
|
|
"step": 3585,
|
|
"valid_targets_mean": 6012.4,
|
|
"valid_targets_min": 3782
|
|
},
|
|
{
|
|
"epoch": 5.809061488673139,
|
|
"grad_norm": 0.6919134158935569,
|
|
"learning_rate": 3.434205071773855e-06,
|
|
"loss": 0.1651,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15268674492835999,
|
|
"step": 3590,
|
|
"valid_targets_mean": 5490.8,
|
|
"valid_targets_min": 3401
|
|
},
|
|
{
|
|
"epoch": 5.81715210355987,
|
|
"grad_norm": 0.525226561251439,
|
|
"learning_rate": 3.3891246743074245e-06,
|
|
"loss": 0.1917,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1895448863506317,
|
|
"step": 3595,
|
|
"valid_targets_mean": 5865.4,
|
|
"valid_targets_min": 4021
|
|
},
|
|
{
|
|
"epoch": 5.825242718446602,
|
|
"grad_norm": 1.6748639613941427,
|
|
"learning_rate": 3.344314711827441e-06,
|
|
"loss": 0.1686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1472940295934677,
|
|
"step": 3600,
|
|
"valid_targets_mean": 5680.2,
|
|
"valid_targets_min": 379
|
|
},
|
|
{
|
|
"epoch": 5.833333333333333,
|
|
"grad_norm": 0.45456760472205415,
|
|
"learning_rate": 3.299775913866894e-06,
|
|
"loss": 0.1816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14308935403823853,
|
|
"step": 3605,
|
|
"valid_targets_mean": 5873.6,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 5.841423948220065,
|
|
"grad_norm": 0.5630454379610851,
|
|
"learning_rate": 3.255509005544062e-06,
|
|
"loss": 0.1661,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21299272775650024,
|
|
"step": 3610,
|
|
"valid_targets_mean": 4735.3,
|
|
"valid_targets_min": 392
|
|
},
|
|
{
|
|
"epoch": 5.849514563106796,
|
|
"grad_norm": 0.5764483473368531,
|
|
"learning_rate": 3.2115147075506957e-06,
|
|
"loss": 0.1878,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1859305202960968,
|
|
"step": 3615,
|
|
"valid_targets_mean": 5192.4,
|
|
"valid_targets_min": 647
|
|
},
|
|
{
|
|
"epoch": 5.857605177993528,
|
|
"grad_norm": 0.5025174908790243,
|
|
"learning_rate": 3.1677937361402654e-06,
|
|
"loss": 0.1784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15480957925319672,
|
|
"step": 3620,
|
|
"valid_targets_mean": 5253.4,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 5.865695792880259,
|
|
"grad_norm": 0.5379455694292067,
|
|
"learning_rate": 3.124346803116354e-06,
|
|
"loss": 0.2101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2440945953130722,
|
|
"step": 3625,
|
|
"valid_targets_mean": 5694.6,
|
|
"valid_targets_min": 503
|
|
},
|
|
{
|
|
"epoch": 5.87378640776699,
|
|
"grad_norm": 0.5382994407776523,
|
|
"learning_rate": 3.0811746158210165e-06,
|
|
"loss": 0.1834,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19144295156002045,
|
|
"step": 3630,
|
|
"valid_targets_mean": 5798.9,
|
|
"valid_targets_min": 1824
|
|
},
|
|
{
|
|
"epoch": 5.881877022653722,
|
|
"grad_norm": 0.6049388573466525,
|
|
"learning_rate": 3.0382778771232766e-06,
|
|
"loss": 0.1787,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1829536110162735,
|
|
"step": 3635,
|
|
"valid_targets_mean": 6017.6,
|
|
"valid_targets_min": 2360
|
|
},
|
|
{
|
|
"epoch": 5.889967637540453,
|
|
"grad_norm": 1.0647486504663568,
|
|
"learning_rate": 2.9956572854077205e-06,
|
|
"loss": 0.2025,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24133262038230896,
|
|
"step": 3640,
|
|
"valid_targets_mean": 7051.8,
|
|
"valid_targets_min": 3489
|
|
},
|
|
{
|
|
"epoch": 5.898058252427185,
|
|
"grad_norm": 0.5096369018693362,
|
|
"learning_rate": 2.9533135345630536e-06,
|
|
"loss": 0.2075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21242384612560272,
|
|
"step": 3645,
|
|
"valid_targets_mean": 6098.8,
|
|
"valid_targets_min": 1470
|
|
},
|
|
{
|
|
"epoch": 5.906148867313916,
|
|
"grad_norm": 0.5494897013669703,
|
|
"learning_rate": 2.911247313970882e-06,
|
|
"loss": 0.1728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16211332380771637,
|
|
"step": 3650,
|
|
"valid_targets_mean": 4782.9,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 5.914239482200648,
|
|
"grad_norm": 0.5116839125190226,
|
|
"learning_rate": 2.8694593084944356e-06,
|
|
"loss": 0.1879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16128423810005188,
|
|
"step": 3655,
|
|
"valid_targets_mean": 5752.4,
|
|
"valid_targets_min": 3100
|
|
},
|
|
{
|
|
"epoch": 5.922330097087379,
|
|
"grad_norm": 0.5133859926626013,
|
|
"learning_rate": 2.8279501984674396e-06,
|
|
"loss": 0.1548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12045817077159882,
|
|
"step": 3660,
|
|
"valid_targets_mean": 6254.7,
|
|
"valid_targets_min": 2156
|
|
},
|
|
{
|
|
"epoch": 5.93042071197411,
|
|
"grad_norm": 0.5696448579606227,
|
|
"learning_rate": 2.7867206596830355e-06,
|
|
"loss": 0.175,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20523615181446075,
|
|
"step": 3665,
|
|
"valid_targets_mean": 5049.7,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 5.938511326860842,
|
|
"grad_norm": 0.4904852898035377,
|
|
"learning_rate": 2.7457713633827763e-06,
|
|
"loss": 0.1856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18365246057510376,
|
|
"step": 3670,
|
|
"valid_targets_mean": 6636.9,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 5.946601941747573,
|
|
"grad_norm": 0.5282042145989304,
|
|
"learning_rate": 2.705102976245697e-06,
|
|
"loss": 0.1618,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16423557698726654,
|
|
"step": 3675,
|
|
"valid_targets_mean": 5273.4,
|
|
"valid_targets_min": 2580
|
|
},
|
|
{
|
|
"epoch": 5.9546925566343045,
|
|
"grad_norm": 0.5154717781682355,
|
|
"learning_rate": 2.6647161603774763e-06,
|
|
"loss": 0.1824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18065178394317627,
|
|
"step": 3680,
|
|
"valid_targets_mean": 5552.4,
|
|
"valid_targets_min": 396
|
|
},
|
|
{
|
|
"epoch": 5.9627831715210355,
|
|
"grad_norm": 0.48478327032359064,
|
|
"learning_rate": 2.624611573299629e-06,
|
|
"loss": 0.1796,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23541313409805298,
|
|
"step": 3685,
|
|
"valid_targets_mean": 7252.6,
|
|
"valid_targets_min": 4266
|
|
},
|
|
{
|
|
"epoch": 5.970873786407767,
|
|
"grad_norm": 0.539425851785002,
|
|
"learning_rate": 2.5847898679388217e-06,
|
|
"loss": 0.1968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19013917446136475,
|
|
"step": 3690,
|
|
"valid_targets_mean": 5798.0,
|
|
"valid_targets_min": 2925
|
|
},
|
|
{
|
|
"epoch": 5.9789644012944985,
|
|
"grad_norm": 0.5429501981383015,
|
|
"learning_rate": 2.5452516926162394e-06,
|
|
"loss": 0.1838,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16128823161125183,
|
|
"step": 3695,
|
|
"valid_targets_mean": 6265.6,
|
|
"valid_targets_min": 2353
|
|
},
|
|
{
|
|
"epoch": 5.9870550161812295,
|
|
"grad_norm": 0.5651497028720758,
|
|
"learning_rate": 2.5059976910370255e-06,
|
|
"loss": 0.1851,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18095943331718445,
|
|
"step": 3700,
|
|
"valid_targets_mean": 5500.2,
|
|
"valid_targets_min": 2785
|
|
},
|
|
{
|
|
"epoch": 5.995145631067961,
|
|
"grad_norm": 0.5098750853748499,
|
|
"learning_rate": 2.467028502279802e-06,
|
|
"loss": 0.1873,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18532708287239075,
|
|
"step": 3705,
|
|
"valid_targets_mean": 6825.4,
|
|
"valid_targets_min": 2113
|
|
},
|
|
{
|
|
"epoch": 6.003236245954692,
|
|
"grad_norm": 0.6429110805047797,
|
|
"learning_rate": 2.428344760786283e-06,
|
|
"loss": 0.2118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3460083305835724,
|
|
"step": 3710,
|
|
"valid_targets_mean": 8109.9,
|
|
"valid_targets_min": 2721
|
|
},
|
|
{
|
|
"epoch": 6.011326860841424,
|
|
"grad_norm": 0.617993460237832,
|
|
"learning_rate": 2.389947096350913e-06,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27906131744384766,
|
|
"step": 3715,
|
|
"valid_targets_mean": 7440.1,
|
|
"valid_targets_min": 295
|
|
},
|
|
{
|
|
"epoch": 6.019417475728155,
|
|
"grad_norm": 0.5342153883227024,
|
|
"learning_rate": 2.3518361341106366e-06,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2538403868675232,
|
|
"step": 3720,
|
|
"valid_targets_mean": 6666.0,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 6.027508090614886,
|
|
"grad_norm": 0.49092970456222856,
|
|
"learning_rate": 2.3140124945347188e-06,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2011633813381195,
|
|
"step": 3725,
|
|
"valid_targets_mean": 6712.9,
|
|
"valid_targets_min": 2430
|
|
},
|
|
{
|
|
"epoch": 6.035598705501618,
|
|
"grad_norm": 0.4772774830924541,
|
|
"learning_rate": 2.2764767934146304e-06,
|
|
"loss": 0.2691,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27531710267066956,
|
|
"step": 3730,
|
|
"valid_targets_mean": 7593.6,
|
|
"valid_targets_min": 2385
|
|
},
|
|
{
|
|
"epoch": 6.043689320388349,
|
|
"grad_norm": 0.4885723859576736,
|
|
"learning_rate": 2.2392296418540527e-06,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31475353240966797,
|
|
"step": 3735,
|
|
"valid_targets_mean": 8572.8,
|
|
"valid_targets_min": 3812
|
|
},
|
|
{
|
|
"epoch": 6.051779935275081,
|
|
"grad_norm": 0.46349461761945754,
|
|
"learning_rate": 2.20227164625888e-06,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23537136614322662,
|
|
"step": 3740,
|
|
"valid_targets_mean": 7005.8,
|
|
"valid_targets_min": 2432
|
|
},
|
|
{
|
|
"epoch": 6.059870550161812,
|
|
"grad_norm": 0.5143036769044641,
|
|
"learning_rate": 2.165603408327386e-06,
|
|
"loss": 0.2824,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2768769860267639,
|
|
"step": 3745,
|
|
"valid_targets_mean": 7800.5,
|
|
"valid_targets_min": 2317
|
|
},
|
|
{
|
|
"epoch": 6.067961165048544,
|
|
"grad_norm": 0.46034482764138784,
|
|
"learning_rate": 2.129225525040428e-06,
|
|
"loss": 0.244,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.25223392248153687,
|
|
"step": 3750,
|
|
"valid_targets_mean": 7407.8,
|
|
"valid_targets_min": 3408
|
|
},
|
|
{
|
|
"epoch": 6.076051779935275,
|
|
"grad_norm": 0.48621110957683666,
|
|
"learning_rate": 2.0931385886517043e-06,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2531178593635559,
|
|
"step": 3755,
|
|
"valid_targets_mean": 7218.2,
|
|
"valid_targets_min": 3958
|
|
},
|
|
{
|
|
"epoch": 6.084142394822006,
|
|
"grad_norm": 0.5155976217240823,
|
|
"learning_rate": 2.05734318667812e-06,
|
|
"loss": 0.2634,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27418065071105957,
|
|
"step": 3760,
|
|
"valid_targets_mean": 6286.2,
|
|
"valid_targets_min": 2470
|
|
},
|
|
{
|
|
"epoch": 6.092233009708738,
|
|
"grad_norm": 0.48664238742645205,
|
|
"learning_rate": 2.0218399018902368e-06,
|
|
"loss": 0.2731,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3080136477947235,
|
|
"step": 3765,
|
|
"valid_targets_mean": 8067.4,
|
|
"valid_targets_min": 3465
|
|
},
|
|
{
|
|
"epoch": 6.100323624595469,
|
|
"grad_norm": 0.46352182330349134,
|
|
"learning_rate": 1.986629312302759e-06,
|
|
"loss": 0.2725,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24487361311912537,
|
|
"step": 3770,
|
|
"valid_targets_mean": 7811.1,
|
|
"valid_targets_min": 3446
|
|
},
|
|
{
|
|
"epoch": 6.108414239482201,
|
|
"grad_norm": 0.47902814582303754,
|
|
"learning_rate": 1.9517119911651594e-06,
|
|
"loss": 0.2611,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27173787355422974,
|
|
"step": 3775,
|
|
"valid_targets_mean": 7021.4,
|
|
"valid_targets_min": 3473
|
|
},
|
|
{
|
|
"epoch": 6.116504854368932,
|
|
"grad_norm": 0.48426838259458393,
|
|
"learning_rate": 1.917088506952307e-06,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21929030120372772,
|
|
"step": 3780,
|
|
"valid_targets_mean": 5997.2,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 6.124595469255663,
|
|
"grad_norm": 0.5379855412992037,
|
|
"learning_rate": 1.8827594233552338e-06,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2751188278198242,
|
|
"step": 3785,
|
|
"valid_targets_mean": 7148.6,
|
|
"valid_targets_min": 4702
|
|
},
|
|
{
|
|
"epoch": 6.132686084142395,
|
|
"grad_norm": 0.4577375186919723,
|
|
"learning_rate": 1.8487252992719562e-06,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24760842323303223,
|
|
"step": 3790,
|
|
"valid_targets_mean": 7563.4,
|
|
"valid_targets_min": 2494
|
|
},
|
|
{
|
|
"epoch": 6.140776699029126,
|
|
"grad_norm": 0.6351508094968208,
|
|
"learning_rate": 1.8149866887983747e-06,
|
|
"loss": 0.2526,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24771344661712646,
|
|
"step": 3795,
|
|
"valid_targets_mean": 7400.8,
|
|
"valid_targets_min": 2898
|
|
},
|
|
{
|
|
"epoch": 6.148867313915858,
|
|
"grad_norm": 0.4709978586642668,
|
|
"learning_rate": 1.7815441412192447e-06,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23811772465705872,
|
|
"step": 3800,
|
|
"valid_targets_mean": 6554.0,
|
|
"valid_targets_min": 2859
|
|
},
|
|
{
|
|
"epoch": 6.156957928802589,
|
|
"grad_norm": 0.48529604703966406,
|
|
"learning_rate": 1.7483982009992506e-06,
|
|
"loss": 0.2742,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2738912105560303,
|
|
"step": 3805,
|
|
"valid_targets_mean": 8087.6,
|
|
"valid_targets_min": 3121
|
|
},
|
|
{
|
|
"epoch": 6.165048543689321,
|
|
"grad_norm": 0.5094951979756573,
|
|
"learning_rate": 1.715549407774124e-06,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26680633425712585,
|
|
"step": 3810,
|
|
"valid_targets_mean": 7483.6,
|
|
"valid_targets_min": 2526
|
|
},
|
|
{
|
|
"epoch": 6.173139158576052,
|
|
"grad_norm": 0.49094410985136205,
|
|
"learning_rate": 1.6829982963418667e-06,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2834646701812744,
|
|
"step": 3815,
|
|
"valid_targets_mean": 7937.6,
|
|
"valid_targets_min": 2332
|
|
},
|
|
{
|
|
"epoch": 6.181229773462783,
|
|
"grad_norm": 0.4943073784626869,
|
|
"learning_rate": 1.6507453966540454e-06,
|
|
"loss": 0.2681,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21011626720428467,
|
|
"step": 3820,
|
|
"valid_targets_mean": 5858.5,
|
|
"valid_targets_min": 1991
|
|
},
|
|
{
|
|
"epoch": 6.189320388349515,
|
|
"grad_norm": 0.5144149277940221,
|
|
"learning_rate": 1.6187912338071577e-06,
|
|
"loss": 0.2636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21853004395961761,
|
|
"step": 3825,
|
|
"valid_targets_mean": 6832.9,
|
|
"valid_targets_min": 2464
|
|
},
|
|
{
|
|
"epoch": 6.197411003236246,
|
|
"grad_norm": 0.4876684368823754,
|
|
"learning_rate": 1.5871363280340913e-06,
|
|
"loss": 0.2515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29543834924697876,
|
|
"step": 3830,
|
|
"valid_targets_mean": 7922.1,
|
|
"valid_targets_min": 3838
|
|
},
|
|
{
|
|
"epoch": 6.205501618122978,
|
|
"grad_norm": 0.8665595775858145,
|
|
"learning_rate": 1.555781194695649e-06,
|
|
"loss": 0.2853,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3264833092689514,
|
|
"step": 3835,
|
|
"valid_targets_mean": 3412.1,
|
|
"valid_targets_min": 1423
|
|
},
|
|
{
|
|
"epoch": 6.213592233009709,
|
|
"grad_norm": 0.892049887510929,
|
|
"learning_rate": 1.5247263442721494e-06,
|
|
"loss": 0.3463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.33291494846343994,
|
|
"step": 3840,
|
|
"valid_targets_mean": 3334.9,
|
|
"valid_targets_min": 1457
|
|
},
|
|
{
|
|
"epoch": 6.2216828478964405,
|
|
"grad_norm": 0.7890194853199695,
|
|
"learning_rate": 1.4939722823551428e-06,
|
|
"loss": 0.3381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2830268442630768,
|
|
"step": 3845,
|
|
"valid_targets_mean": 3261.9,
|
|
"valid_targets_min": 1130
|
|
},
|
|
{
|
|
"epoch": 6.229773462783172,
|
|
"grad_norm": 0.9008470932667201,
|
|
"learning_rate": 1.4635195096391463e-06,
|
|
"loss": 0.304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32268476486206055,
|
|
"step": 3850,
|
|
"valid_targets_mean": 3196.9,
|
|
"valid_targets_min": 1352
|
|
},
|
|
{
|
|
"epoch": 6.237864077669903,
|
|
"grad_norm": 0.8472773074650307,
|
|
"learning_rate": 1.4333685219135163e-06,
|
|
"loss": 0.3449,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3363885283470154,
|
|
"step": 3855,
|
|
"valid_targets_mean": 3218.4,
|
|
"valid_targets_min": 1283
|
|
},
|
|
{
|
|
"epoch": 6.2459546925566345,
|
|
"grad_norm": 0.8552973048647464,
|
|
"learning_rate": 1.403519810054379e-06,
|
|
"loss": 0.3296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3232272267341614,
|
|
"step": 3860,
|
|
"valid_targets_mean": 3128.2,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 6.2540453074433655,
|
|
"grad_norm": 0.7934267015934255,
|
|
"learning_rate": 1.373973860016602e-06,
|
|
"loss": 0.3281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3452760577201843,
|
|
"step": 3865,
|
|
"valid_targets_mean": 3595.7,
|
|
"valid_targets_min": 1167
|
|
},
|
|
{
|
|
"epoch": 6.262135922330097,
|
|
"grad_norm": 0.9335558226534135,
|
|
"learning_rate": 1.3447311528259354e-06,
|
|
"loss": 0.3284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3375539779663086,
|
|
"step": 3870,
|
|
"valid_targets_mean": 2384.2,
|
|
"valid_targets_min": 1050
|
|
},
|
|
{
|
|
"epoch": 6.270226537216828,
|
|
"grad_norm": 1.020746618496496,
|
|
"learning_rate": 1.3157921645711436e-06,
|
|
"loss": 0.3124,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3059931993484497,
|
|
"step": 3875,
|
|
"valid_targets_mean": 2044.2,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 6.2783171521035595,
|
|
"grad_norm": 0.9350202814104195,
|
|
"learning_rate": 1.2871573663962611e-06,
|
|
"loss": 0.315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34296131134033203,
|
|
"step": 3880,
|
|
"valid_targets_mean": 2718.1,
|
|
"valid_targets_min": 1454
|
|
},
|
|
{
|
|
"epoch": 6.286407766990291,
|
|
"grad_norm": 0.8146668135738933,
|
|
"learning_rate": 1.2588272244929401e-06,
|
|
"loss": 0.3302,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30769336223602295,
|
|
"step": 3885,
|
|
"valid_targets_mean": 3258.4,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 6.294498381877022,
|
|
"grad_norm": 0.8273218896381229,
|
|
"learning_rate": 1.2308022000928287e-06,
|
|
"loss": 0.3191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3195771872997284,
|
|
"step": 3890,
|
|
"valid_targets_mean": 3150.2,
|
|
"valid_targets_min": 1604
|
|
},
|
|
{
|
|
"epoch": 6.302588996763754,
|
|
"grad_norm": 0.8419774148102896,
|
|
"learning_rate": 1.203082749460085e-06,
|
|
"loss": 0.3106,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2897275388240814,
|
|
"step": 3895,
|
|
"valid_targets_mean": 2876.9,
|
|
"valid_targets_min": 1155
|
|
},
|
|
{
|
|
"epoch": 6.310679611650485,
|
|
"grad_norm": 1.050868420114631,
|
|
"learning_rate": 1.1756693238839566e-06,
|
|
"loss": 0.3047,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29404884576797485,
|
|
"step": 3900,
|
|
"valid_targets_mean": 4079.6,
|
|
"valid_targets_min": 1052
|
|
},
|
|
{
|
|
"epoch": 6.318770226537217,
|
|
"grad_norm": 0.801631926562582,
|
|
"learning_rate": 1.1485623696714043e-06,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28582143783569336,
|
|
"step": 3905,
|
|
"valid_targets_mean": 3415.6,
|
|
"valid_targets_min": 1602
|
|
},
|
|
{
|
|
"epoch": 6.326860841423948,
|
|
"grad_norm": 0.7993792702234223,
|
|
"learning_rate": 1.1217623281398571e-06,
|
|
"loss": 0.31,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2818544805049896,
|
|
"step": 3910,
|
|
"valid_targets_mean": 3181.6,
|
|
"valid_targets_min": 1316
|
|
},
|
|
{
|
|
"epoch": 6.334951456310679,
|
|
"grad_norm": 0.8511495939913303,
|
|
"learning_rate": 1.0952696356100234e-06,
|
|
"loss": 0.3094,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3155943751335144,
|
|
"step": 3915,
|
|
"valid_targets_mean": 3121.9,
|
|
"valid_targets_min": 1419
|
|
},
|
|
{
|
|
"epoch": 6.343042071197411,
|
|
"grad_norm": 0.7964448416131412,
|
|
"learning_rate": 1.069084723398781e-06,
|
|
"loss": 0.3071,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3194991946220398,
|
|
"step": 3920,
|
|
"valid_targets_mean": 3586.2,
|
|
"valid_targets_min": 1317
|
|
},
|
|
{
|
|
"epoch": 6.351132686084142,
|
|
"grad_norm": 1.895989148097138,
|
|
"learning_rate": 1.0432080178121695e-06,
|
|
"loss": 0.3055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2887115478515625,
|
|
"step": 3925,
|
|
"valid_targets_mean": 2831.5,
|
|
"valid_targets_min": 970
|
|
},
|
|
{
|
|
"epoch": 6.359223300970874,
|
|
"grad_norm": 0.8254121060367813,
|
|
"learning_rate": 1.0176399401384306e-06,
|
|
"loss": 0.3191,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30215805768966675,
|
|
"step": 3930,
|
|
"valid_targets_mean": 3350.7,
|
|
"valid_targets_min": 1482
|
|
},
|
|
{
|
|
"epoch": 6.367313915857605,
|
|
"grad_norm": 0.8600251391796992,
|
|
"learning_rate": 9.92380906641166e-07,
|
|
"loss": 0.3095,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2902137339115143,
|
|
"step": 3935,
|
|
"valid_targets_mean": 3731.3,
|
|
"valid_targets_min": 2450
|
|
},
|
|
{
|
|
"epoch": 6.375404530744337,
|
|
"grad_norm": 0.8998136566486831,
|
|
"learning_rate": 9.674313285525484e-07,
|
|
"loss": 0.3088,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3461061716079712,
|
|
"step": 3940,
|
|
"valid_targets_mean": 2778.6,
|
|
"valid_targets_min": 1161
|
|
},
|
|
{
|
|
"epoch": 6.383495145631068,
|
|
"grad_norm": 0.9099848810975844,
|
|
"learning_rate": 9.427916120666314e-07,
|
|
"loss": 0.3182,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32288292050361633,
|
|
"step": 3945,
|
|
"valid_targets_mean": 2516.2,
|
|
"valid_targets_min": 1147
|
|
},
|
|
{
|
|
"epoch": 6.391585760517799,
|
|
"grad_norm": 0.9030343212723014,
|
|
"learning_rate": 9.18462158332738e-07,
|
|
"loss": 0.3118,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29295065999031067,
|
|
"step": 3950,
|
|
"valid_targets_mean": 2672.7,
|
|
"valid_targets_min": 1160
|
|
},
|
|
{
|
|
"epoch": 6.399676375404531,
|
|
"grad_norm": 1.0808970579643715,
|
|
"learning_rate": 8.944433634489335e-07,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31745681166648865,
|
|
"step": 3955,
|
|
"valid_targets_mean": 3144.7,
|
|
"valid_targets_min": 1229
|
|
},
|
|
{
|
|
"epoch": 6.407766990291262,
|
|
"grad_norm": 0.8902905271705912,
|
|
"learning_rate": 8.707356184555626e-07,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.307051420211792,
|
|
"step": 3960,
|
|
"valid_targets_mean": 2757.4,
|
|
"valid_targets_min": 1187
|
|
},
|
|
{
|
|
"epoch": 6.415857605177994,
|
|
"grad_norm": 0.8492882495541785,
|
|
"learning_rate": 8.473393093288962e-07,
|
|
"loss": 0.3102,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30651533603668213,
|
|
"step": 3965,
|
|
"valid_targets_mean": 3186.5,
|
|
"valid_targets_min": 1140
|
|
},
|
|
{
|
|
"epoch": 6.423948220064725,
|
|
"grad_norm": 0.8938811801174615,
|
|
"learning_rate": 8.242548169748388e-07,
|
|
"loss": 0.3147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30959033966064453,
|
|
"step": 3970,
|
|
"valid_targets_mean": 2945.1,
|
|
"valid_targets_min": 1342
|
|
},
|
|
{
|
|
"epoch": 6.432038834951456,
|
|
"grad_norm": 0.8851063163539813,
|
|
"learning_rate": 8.014825172227359e-07,
|
|
"loss": 0.3006,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2955814003944397,
|
|
"step": 3975,
|
|
"valid_targets_mean": 2664.8,
|
|
"valid_targets_min": 1158
|
|
},
|
|
{
|
|
"epoch": 6.440129449838188,
|
|
"grad_norm": 0.8471775153920295,
|
|
"learning_rate": 7.790227808192497e-07,
|
|
"loss": 0.3096,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.32479000091552734,
|
|
"step": 3980,
|
|
"valid_targets_mean": 3277.8,
|
|
"valid_targets_min": 905
|
|
},
|
|
{
|
|
"epoch": 6.448220064724919,
|
|
"grad_norm": 0.7712971413979559,
|
|
"learning_rate": 7.568759734223263e-07,
|
|
"loss": 0.2849,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26132726669311523,
|
|
"step": 3985,
|
|
"valid_targets_mean": 3435.2,
|
|
"valid_targets_min": 1523
|
|
},
|
|
{
|
|
"epoch": 6.456310679611651,
|
|
"grad_norm": 0.7960120633487386,
|
|
"learning_rate": 7.350424555952318e-07,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2780383825302124,
|
|
"step": 3990,
|
|
"valid_targets_mean": 4198.9,
|
|
"valid_targets_min": 916
|
|
},
|
|
{
|
|
"epoch": 6.464401294498382,
|
|
"grad_norm": 0.7940656209613582,
|
|
"learning_rate": 7.135225828007009e-07,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3192026615142822,
|
|
"step": 3995,
|
|
"valid_targets_mean": 4307.2,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 6.472491909385114,
|
|
"grad_norm": 0.8659078167481403,
|
|
"learning_rate": 6.92316705395133e-07,
|
|
"loss": 0.2949,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2758025527000427,
|
|
"step": 4000,
|
|
"valid_targets_mean": 4283.3,
|
|
"valid_targets_min": 965
|
|
},
|
|
{
|
|
"epoch": 6.480582524271845,
|
|
"grad_norm": 0.7520716674040167,
|
|
"learning_rate": 6.714251686228968e-07,
|
|
"loss": 0.3044,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3214672803878784,
|
|
"step": 4005,
|
|
"valid_targets_mean": 4556.1,
|
|
"valid_targets_min": 1616
|
|
},
|
|
{
|
|
"epoch": 6.488673139158576,
|
|
"grad_norm": 0.8133648933005242,
|
|
"learning_rate": 6.508483126107146e-07,
|
|
"loss": 0.2774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.255154550075531,
|
|
"step": 4010,
|
|
"valid_targets_mean": 3447.2,
|
|
"valid_targets_min": 1306
|
|
},
|
|
{
|
|
"epoch": 6.496763754045308,
|
|
"grad_norm": 0.7949247419294541,
|
|
"learning_rate": 6.305864723621025e-07,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3274669647216797,
|
|
"step": 4015,
|
|
"valid_targets_mean": 4328.6,
|
|
"valid_targets_min": 1066
|
|
},
|
|
{
|
|
"epoch": 6.504854368932039,
|
|
"grad_norm": 0.7110098139561425,
|
|
"learning_rate": 6.10639977751939e-07,
|
|
"loss": 0.3131,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28492259979248047,
|
|
"step": 4020,
|
|
"valid_targets_mean": 3835.6,
|
|
"valid_targets_min": 265
|
|
},
|
|
{
|
|
"epoch": 6.5129449838187705,
|
|
"grad_norm": 0.7126645911750137,
|
|
"learning_rate": 5.91009153521096e-07,
|
|
"loss": 0.2889,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24657857418060303,
|
|
"step": 4025,
|
|
"valid_targets_mean": 3986.9,
|
|
"valid_targets_min": 1113
|
|
},
|
|
{
|
|
"epoch": 6.5210355987055015,
|
|
"grad_norm": 0.7110478416139949,
|
|
"learning_rate": 5.716943192711277e-07,
|
|
"loss": 0.2818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.26910921931266785,
|
|
"step": 4030,
|
|
"valid_targets_mean": 4109.8,
|
|
"valid_targets_min": 1541
|
|
},
|
|
{
|
|
"epoch": 6.529126213592233,
|
|
"grad_norm": 0.7449519077071346,
|
|
"learning_rate": 5.526957894590923e-07,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3063822388648987,
|
|
"step": 4035,
|
|
"valid_targets_mean": 4168.6,
|
|
"valid_targets_min": 2291
|
|
},
|
|
{
|
|
"epoch": 6.5372168284789645,
|
|
"grad_norm": 0.7021399739407856,
|
|
"learning_rate": 5.340138733924161e-07,
|
|
"loss": 0.3026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2863706052303314,
|
|
"step": 4040,
|
|
"valid_targets_mean": 4216.5,
|
|
"valid_targets_min": 1752
|
|
},
|
|
{
|
|
"epoch": 6.5453074433656955,
|
|
"grad_norm": 0.7735052174852324,
|
|
"learning_rate": 5.156488752238708e-07,
|
|
"loss": 0.2843,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.29372304677963257,
|
|
"step": 4045,
|
|
"valid_targets_mean": 3781.9,
|
|
"valid_targets_min": 1305
|
|
},
|
|
{
|
|
"epoch": 6.553398058252427,
|
|
"grad_norm": 0.763189708006023,
|
|
"learning_rate": 4.976010939466136e-07,
|
|
"loss": 0.2811,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.265279620885849,
|
|
"step": 4050,
|
|
"valid_targets_mean": 3348.1,
|
|
"valid_targets_min": 1135
|
|
},
|
|
{
|
|
"epoch": 6.561488673139158,
|
|
"grad_norm": 0.6971921406699557,
|
|
"learning_rate": 4.798708233893168e-07,
|
|
"loss": 0.2893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3288305401802063,
|
|
"step": 4055,
|
|
"valid_targets_mean": 4481.9,
|
|
"valid_targets_min": 1322
|
|
},
|
|
{
|
|
"epoch": 6.56957928802589,
|
|
"grad_norm": 0.7843790286009376,
|
|
"learning_rate": 4.624583522113879e-07,
|
|
"loss": 0.2906,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31674474477767944,
|
|
"step": 4060,
|
|
"valid_targets_mean": 3422.4,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 6.577669902912621,
|
|
"grad_norm": 0.6898905734307865,
|
|
"learning_rate": 4.4536396389827986e-07,
|
|
"loss": 0.2808,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2700502872467041,
|
|
"step": 4065,
|
|
"valid_targets_mean": 3936.9,
|
|
"valid_targets_min": 991
|
|
},
|
|
{
|
|
"epoch": 6.585760517799352,
|
|
"grad_norm": 0.6732046516074872,
|
|
"learning_rate": 4.285879367568546e-07,
|
|
"loss": 0.2755,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2750830054283142,
|
|
"step": 4070,
|
|
"valid_targets_mean": 3986.6,
|
|
"valid_targets_min": 1735
|
|
},
|
|
{
|
|
"epoch": 6.593851132686084,
|
|
"grad_norm": 0.7679798193130004,
|
|
"learning_rate": 4.1213054391086914e-07,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28283199667930603,
|
|
"step": 4075,
|
|
"valid_targets_mean": 3311.6,
|
|
"valid_targets_min": 1092
|
|
},
|
|
{
|
|
"epoch": 6.601941747572815,
|
|
"grad_norm": 0.6026507323893453,
|
|
"learning_rate": 3.959920532965278e-07,
|
|
"loss": 0.3089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2601849436759949,
|
|
"step": 4080,
|
|
"valid_targets_mean": 4680.1,
|
|
"valid_targets_min": 1798
|
|
},
|
|
{
|
|
"epoch": 6.610032362459547,
|
|
"grad_norm": 0.7227252220712441,
|
|
"learning_rate": 3.8017272765810795e-07,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30186226963996887,
|
|
"step": 4085,
|
|
"valid_targets_mean": 4138.6,
|
|
"valid_targets_min": 1094
|
|
},
|
|
{
|
|
"epoch": 6.618122977346278,
|
|
"grad_norm": 0.6797239186310708,
|
|
"learning_rate": 3.646728245436926e-07,
|
|
"loss": 0.2947,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2772945761680603,
|
|
"step": 4090,
|
|
"valid_targets_mean": 3797.0,
|
|
"valid_targets_min": 1324
|
|
},
|
|
{
|
|
"epoch": 6.62621359223301,
|
|
"grad_norm": 0.6566219650337652,
|
|
"learning_rate": 3.4949259630097985e-07,
|
|
"loss": 0.295,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31445634365081787,
|
|
"step": 4095,
|
|
"valid_targets_mean": 4714.8,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 6.634304207119741,
|
|
"grad_norm": 0.7855187195704445,
|
|
"learning_rate": 3.346322900731602e-07,
|
|
"loss": 0.2921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3059901297092438,
|
|
"step": 4100,
|
|
"valid_targets_mean": 3223.0,
|
|
"valid_targets_min": 1392
|
|
},
|
|
{
|
|
"epoch": 6.642394822006472,
|
|
"grad_norm": 0.687926997782234,
|
|
"learning_rate": 3.2009214779491703e-07,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2721381187438965,
|
|
"step": 4105,
|
|
"valid_targets_mean": 3943.1,
|
|
"valid_targets_min": 1237
|
|
},
|
|
{
|
|
"epoch": 6.650485436893204,
|
|
"grad_norm": 0.6262388534931846,
|
|
"learning_rate": 3.0587240618845437e-07,
|
|
"loss": 0.2595,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2525428533554077,
|
|
"step": 4110,
|
|
"valid_targets_mean": 4954.7,
|
|
"valid_targets_min": 706
|
|
},
|
|
{
|
|
"epoch": 6.658576051779935,
|
|
"grad_norm": 0.7958100221453792,
|
|
"learning_rate": 2.9197329675967556e-07,
|
|
"loss": 0.266,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23203250765800476,
|
|
"step": 4115,
|
|
"valid_targets_mean": 3676.8,
|
|
"valid_targets_min": 1093
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.6670568718226326,
|
|
"learning_rate": 2.7839504579439734e-07,
|
|
"loss": 0.277,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2591486871242523,
|
|
"step": 4120,
|
|
"valid_targets_mean": 4621.4,
|
|
"valid_targets_min": 1738
|
|
},
|
|
{
|
|
"epoch": 6.674757281553398,
|
|
"grad_norm": 0.6841964897124194,
|
|
"learning_rate": 2.651378743546662e-07,
|
|
"loss": 0.2584,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23089730739593506,
|
|
"step": 4125,
|
|
"valid_targets_mean": 3523.9,
|
|
"valid_targets_min": 1856
|
|
},
|
|
{
|
|
"epoch": 6.68284789644013,
|
|
"grad_norm": 0.7736922565372187,
|
|
"learning_rate": 2.5220199827516335e-07,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2762467861175537,
|
|
"step": 4130,
|
|
"valid_targets_mean": 3767.3,
|
|
"valid_targets_min": 1039
|
|
},
|
|
{
|
|
"epoch": 6.690938511326861,
|
|
"grad_norm": 0.6878220159499924,
|
|
"learning_rate": 2.395876281596898e-07,
|
|
"loss": 0.2884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.286093533039093,
|
|
"step": 4135,
|
|
"valid_targets_mean": 4240.9,
|
|
"valid_targets_min": 995
|
|
},
|
|
{
|
|
"epoch": 6.699029126213592,
|
|
"grad_norm": 0.6785912150782965,
|
|
"learning_rate": 2.2729496937773375e-07,
|
|
"loss": 0.2825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3174231946468353,
|
|
"step": 4140,
|
|
"valid_targets_mean": 5330.5,
|
|
"valid_targets_min": 1212
|
|
},
|
|
{
|
|
"epoch": 6.707119741100324,
|
|
"grad_norm": 0.6514244014519243,
|
|
"learning_rate": 2.1532422206113957e-07,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21629130840301514,
|
|
"step": 4145,
|
|
"valid_targets_mean": 3586.9,
|
|
"valid_targets_min": 1172
|
|
},
|
|
{
|
|
"epoch": 6.715210355987055,
|
|
"grad_norm": 0.7503383439275698,
|
|
"learning_rate": 2.036755811008284e-07,
|
|
"loss": 0.2603,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.261480450630188,
|
|
"step": 4150,
|
|
"valid_targets_mean": 3200.2,
|
|
"valid_targets_min": 1561
|
|
},
|
|
{
|
|
"epoch": 6.723300970873787,
|
|
"grad_norm": 0.7498083689815956,
|
|
"learning_rate": 1.9234923614364298e-07,
|
|
"loss": 0.2688,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.31724271178245544,
|
|
"step": 4155,
|
|
"valid_targets_mean": 3854.4,
|
|
"valid_targets_min": 1217
|
|
},
|
|
{
|
|
"epoch": 6.731391585760518,
|
|
"grad_norm": 0.8416117264342016,
|
|
"learning_rate": 1.813453715892588e-07,
|
|
"loss": 0.2766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.30290308594703674,
|
|
"step": 4160,
|
|
"valid_targets_mean": 2972.9,
|
|
"valid_targets_min": 333
|
|
},
|
|
{
|
|
"epoch": 6.739482200647249,
|
|
"grad_norm": 0.7415989066146672,
|
|
"learning_rate": 1.706641665871689e-07,
|
|
"loss": 0.2747,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.292361855506897,
|
|
"step": 4165,
|
|
"valid_targets_mean": 3447.4,
|
|
"valid_targets_min": 1280
|
|
},
|
|
{
|
|
"epoch": 6.747572815533981,
|
|
"grad_norm": 0.7261344570096327,
|
|
"learning_rate": 1.603057950337794e-07,
|
|
"loss": 0.2685,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2824702262878418,
|
|
"step": 4170,
|
|
"valid_targets_mean": 3488.9,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 6.755663430420712,
|
|
"grad_norm": 0.5981854317735275,
|
|
"learning_rate": 1.5027042556958083e-07,
|
|
"loss": 0.195,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16710269451141357,
|
|
"step": 4175,
|
|
"valid_targets_mean": 6022.9,
|
|
"valid_targets_min": 2384
|
|
},
|
|
{
|
|
"epoch": 6.763754045307444,
|
|
"grad_norm": 0.6204828785463561,
|
|
"learning_rate": 1.4055822157638566e-07,
|
|
"loss": 0.1855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17309242486953735,
|
|
"step": 4180,
|
|
"valid_targets_mean": 6773.4,
|
|
"valid_targets_min": 1712
|
|
},
|
|
{
|
|
"epoch": 6.771844660194175,
|
|
"grad_norm": 0.6175696916898002,
|
|
"learning_rate": 1.3116934117468617e-07,
|
|
"loss": 0.1864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1712668240070343,
|
|
"step": 4185,
|
|
"valid_targets_mean": 5853.4,
|
|
"valid_targets_min": 2336
|
|
},
|
|
{
|
|
"epoch": 6.779935275080906,
|
|
"grad_norm": 0.6110178559303455,
|
|
"learning_rate": 1.2210393722106973e-07,
|
|
"loss": 0.1748,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16805970668792725,
|
|
"step": 4190,
|
|
"valid_targets_mean": 5625.0,
|
|
"valid_targets_min": 341
|
|
},
|
|
{
|
|
"epoch": 6.788025889967638,
|
|
"grad_norm": 0.5850597980478186,
|
|
"learning_rate": 1.1336215730573863e-07,
|
|
"loss": 0.1884,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.141169935464859,
|
|
"step": 4195,
|
|
"valid_targets_mean": 4999.9,
|
|
"valid_targets_min": 546
|
|
},
|
|
{
|
|
"epoch": 6.796116504854369,
|
|
"grad_norm": 0.5304772632067583,
|
|
"learning_rate": 1.0494414375009642e-07,
|
|
"loss": 0.1677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16190440952777863,
|
|
"step": 4200,
|
|
"valid_targets_mean": 5886.6,
|
|
"valid_targets_min": 362
|
|
},
|
|
{
|
|
"epoch": 6.8042071197411005,
|
|
"grad_norm": 0.7157488847124063,
|
|
"learning_rate": 9.68500336044409e-08,
|
|
"loss": 0.1746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16354969143867493,
|
|
"step": 4205,
|
|
"valid_targets_mean": 5545.2,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 6.8122977346278315,
|
|
"grad_norm": 0.6188536501701989,
|
|
"learning_rate": 8.907995864572583e-08,
|
|
"loss": 0.1707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1599283516407013,
|
|
"step": 4210,
|
|
"valid_targets_mean": 5221.4,
|
|
"valid_targets_min": 685
|
|
},
|
|
{
|
|
"epoch": 6.820388349514563,
|
|
"grad_norm": 0.6051646108971138,
|
|
"learning_rate": 8.16340453754183e-08,
|
|
"loss": 0.1865,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15831029415130615,
|
|
"step": 4215,
|
|
"valid_targets_mean": 6906.4,
|
|
"valid_targets_min": 3688
|
|
},
|
|
{
|
|
"epoch": 6.828478964401294,
|
|
"grad_norm": 0.6843142673349079,
|
|
"learning_rate": 7.451241501744255e-08,
|
|
"loss": 0.1734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2084500789642334,
|
|
"step": 4220,
|
|
"valid_targets_mean": 5886.7,
|
|
"valid_targets_min": 3248
|
|
},
|
|
{
|
|
"epoch": 6.836569579288026,
|
|
"grad_norm": 0.8169465061204166,
|
|
"learning_rate": 6.771518351619932e-08,
|
|
"loss": 0.165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1563340127468109,
|
|
"step": 4225,
|
|
"valid_targets_mean": 5565.2,
|
|
"valid_targets_min": 849
|
|
},
|
|
{
|
|
"epoch": 6.844660194174757,
|
|
"grad_norm": 0.6240263655362814,
|
|
"learning_rate": 6.124246153468516e-08,
|
|
"loss": 0.1757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18898531794548035,
|
|
"step": 4230,
|
|
"valid_targets_mean": 4495.2,
|
|
"valid_targets_min": 1104
|
|
},
|
|
{
|
|
"epoch": 6.852750809061488,
|
|
"grad_norm": 0.6165610195908027,
|
|
"learning_rate": 5.5094354452684964e-08,
|
|
"loss": 0.1912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17224600911140442,
|
|
"step": 4235,
|
|
"valid_targets_mean": 5532.5,
|
|
"valid_targets_min": 3084
|
|
},
|
|
{
|
|
"epoch": 6.86084142394822,
|
|
"grad_norm": 0.5752785701802093,
|
|
"learning_rate": 4.927096236505779e-08,
|
|
"loss": 0.1809,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22186093032360077,
|
|
"step": 4240,
|
|
"valid_targets_mean": 6345.2,
|
|
"valid_targets_min": 2085
|
|
},
|
|
{
|
|
"epoch": 6.868932038834951,
|
|
"grad_norm": 0.6268844620559095,
|
|
"learning_rate": 4.3772380080111534e-08,
|
|
"loss": 0.2009,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1646140217781067,
|
|
"step": 4245,
|
|
"valid_targets_mean": 6782.8,
|
|
"valid_targets_min": 4896
|
|
},
|
|
{
|
|
"epoch": 6.877022653721683,
|
|
"grad_norm": 0.546664853489719,
|
|
"learning_rate": 3.85986971180552e-08,
|
|
"loss": 0.1696,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12946677207946777,
|
|
"step": 4250,
|
|
"valid_targets_mean": 4796.1,
|
|
"valid_targets_min": 1174
|
|
},
|
|
{
|
|
"epoch": 6.885113268608414,
|
|
"grad_norm": 0.6505927779685415,
|
|
"learning_rate": 3.374999770954013e-08,
|
|
"loss": 0.1907,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1995866596698761,
|
|
"step": 4255,
|
|
"valid_targets_mean": 6176.4,
|
|
"valid_targets_min": 2462
|
|
},
|
|
{
|
|
"epoch": 6.893203883495145,
|
|
"grad_norm": 0.6100138602783509,
|
|
"learning_rate": 2.9226360794296638e-08,
|
|
"loss": 0.2156,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28793472051620483,
|
|
"step": 4260,
|
|
"valid_targets_mean": 5792.8,
|
|
"valid_targets_min": 3198
|
|
},
|
|
{
|
|
"epoch": 6.901294498381877,
|
|
"grad_norm": 0.5130289551978165,
|
|
"learning_rate": 2.502786001983726e-08,
|
|
"loss": 0.1784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15492425858974457,
|
|
"step": 4265,
|
|
"valid_targets_mean": 6734.0,
|
|
"valid_targets_min": 3363
|
|
},
|
|
{
|
|
"epoch": 6.909385113268608,
|
|
"grad_norm": 0.5083619319200668,
|
|
"learning_rate": 2.1154563740266588e-08,
|
|
"loss": 0.173,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1484774798154831,
|
|
"step": 4270,
|
|
"valid_targets_mean": 6156.2,
|
|
"valid_targets_min": 3554
|
|
},
|
|
{
|
|
"epoch": 6.91747572815534,
|
|
"grad_norm": 0.5437899387117957,
|
|
"learning_rate": 1.7606535015164405e-08,
|
|
"loss": 0.1874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19615788757801056,
|
|
"step": 4275,
|
|
"valid_targets_mean": 6275.4,
|
|
"valid_targets_min": 521
|
|
},
|
|
{
|
|
"epoch": 6.925566343042071,
|
|
"grad_norm": 0.535514833773013,
|
|
"learning_rate": 1.4383831608562048e-08,
|
|
"loss": 0.1581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18205270171165466,
|
|
"step": 4280,
|
|
"valid_targets_mean": 6298.0,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 6.933656957928802,
|
|
"grad_norm": 0.5176082517371361,
|
|
"learning_rate": 1.14865059879965e-08,
|
|
"loss": 0.1864,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24456022679805756,
|
|
"step": 4285,
|
|
"valid_targets_mean": 6639.6,
|
|
"valid_targets_min": 2502
|
|
},
|
|
{
|
|
"epoch": 6.941747572815534,
|
|
"grad_norm": 0.5795990513548355,
|
|
"learning_rate": 8.914605323664394e-09,
|
|
"loss": 0.1704,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16445422172546387,
|
|
"step": 4290,
|
|
"valid_targets_mean": 5588.2,
|
|
"valid_targets_min": 3131
|
|
},
|
|
{
|
|
"epoch": 6.949838187702265,
|
|
"grad_norm": 0.6200044961722991,
|
|
"learning_rate": 6.66817148764487e-09,
|
|
"loss": 0.1564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16037827730178833,
|
|
"step": 4295,
|
|
"valid_targets_mean": 5546.2,
|
|
"valid_targets_min": 892
|
|
},
|
|
{
|
|
"epoch": 6.957928802588997,
|
|
"grad_norm": 0.6945388564726743,
|
|
"learning_rate": 4.7472410532245495e-09,
|
|
"loss": 0.1825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16633108258247375,
|
|
"step": 4300,
|
|
"valid_targets_mean": 5383.6,
|
|
"valid_targets_min": 632
|
|
},
|
|
{
|
|
"epoch": 6.966019417475728,
|
|
"grad_norm": 0.5779686340710244,
|
|
"learning_rate": 3.151845294302458e-09,
|
|
"loss": 0.1862,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22585266828536987,
|
|
"step": 4305,
|
|
"valid_targets_mean": 5866.8,
|
|
"valid_targets_min": 523
|
|
},
|
|
{
|
|
"epoch": 6.97411003236246,
|
|
"grad_norm": 0.5613541281121649,
|
|
"learning_rate": 1.882010184874883e-09,
|
|
"loss": 0.1936,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1740507185459137,
|
|
"step": 4310,
|
|
"valid_targets_mean": 5499.6,
|
|
"valid_targets_min": 1071
|
|
},
|
|
{
|
|
"epoch": 6.982200647249191,
|
|
"grad_norm": 0.5251874269236045,
|
|
"learning_rate": 9.377563986157078e-10,
|
|
"loss": 0.1831,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2157984972000122,
|
|
"step": 4315,
|
|
"valid_targets_mean": 6634.6,
|
|
"valid_targets_min": 910
|
|
},
|
|
{
|
|
"epoch": 6.990291262135923,
|
|
"grad_norm": 0.515253369974493,
|
|
"learning_rate": 3.1909930854112646e-10,
|
|
"loss": 0.1816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1872372180223465,
|
|
"step": 4320,
|
|
"valid_targets_mean": 5642.3,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 6.998381877022654,
|
|
"grad_norm": 0.5332063951683166,
|
|
"learning_rate": 2.6048986760951466e-11,
|
|
"loss": 0.177,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17377439141273499,
|
|
"step": 4325,
|
|
"valid_targets_mean": 5486.1,
|
|
"valid_targets_min": 628
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20184344053268433,
|
|
"step": 4326,
|
|
"total_flos": 1656860604825600.0,
|
|
"train_loss": 0.3245428150801321,
|
|
"train_runtime": 27498.7411,
|
|
"train_samples_per_second": 2.513,
|
|
"train_steps_per_second": 0.157,
|
|
"valid_targets_mean": 6528.7,
|
|
"valid_targets_min": 1969
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 4326,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 1500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1656860604825600.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|