Files
nemotron-terminal-dependenc…/trainer_state.json

1639 lines
45 KiB
JSON
Raw Permalink Normal View History

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 728,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04823151125401929,
"grad_norm": 12.448248888949518,
"learning_rate": 2.191780821917808e-06,
"loss": 1.0168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3392058312892914,
"step": 5,
"valid_targets_mean": 7154.5,
"valid_targets_min": 1632
},
{
"epoch": 0.09646302250803858,
"grad_norm": 7.284065798181604,
"learning_rate": 4.931506849315069e-06,
"loss": 0.9698,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2997957468032837,
"step": 10,
"valid_targets_mean": 6839.2,
"valid_targets_min": 2692
},
{
"epoch": 0.14469453376205788,
"grad_norm": 2.616202122732068,
"learning_rate": 7.671232876712329e-06,
"loss": 0.8828,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28220462799072266,
"step": 15,
"valid_targets_mean": 6918.6,
"valid_targets_min": 2206
},
{
"epoch": 0.19292604501607716,
"grad_norm": 1.699025397345384,
"learning_rate": 1.0410958904109589e-05,
"loss": 0.8296,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2794211804866791,
"step": 20,
"valid_targets_mean": 7609.6,
"valid_targets_min": 2545
},
{
"epoch": 0.24115755627009647,
"grad_norm": 1.290348718584449,
"learning_rate": 1.3150684931506849e-05,
"loss": 0.7939,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2636311650276184,
"step": 25,
"valid_targets_mean": 7392.3,
"valid_targets_min": 2631
},
{
"epoch": 0.28938906752411575,
"grad_norm": 0.8749707786912787,
"learning_rate": 1.589041095890411e-05,
"loss": 0.7572,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24677854776382446,
"step": 30,
"valid_targets_mean": 7459.0,
"valid_targets_min": 2419
},
{
"epoch": 0.33762057877813506,
"grad_norm": 0.7052716822140082,
"learning_rate": 1.863013698630137e-05,
"loss": 0.7285,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24943570792675018,
"step": 35,
"valid_targets_mean": 7053.0,
"valid_targets_min": 2482
},
{
"epoch": 0.3858520900321543,
"grad_norm": 0.5008032218431284,
"learning_rate": 2.1369863013698632e-05,
"loss": 0.6917,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21115031838417053,
"step": 40,
"valid_targets_mean": 6766.8,
"valid_targets_min": 2431
},
{
"epoch": 0.4340836012861736,
"grad_norm": 0.43138890795771023,
"learning_rate": 2.410958904109589e-05,
"loss": 0.6598,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1905011534690857,
"step": 45,
"valid_targets_mean": 6303.5,
"valid_targets_min": 2602
},
{
"epoch": 0.48231511254019294,
"grad_norm": 0.3849831894181003,
"learning_rate": 2.6849315068493153e-05,
"loss": 0.6235,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19090837240219116,
"step": 50,
"valid_targets_mean": 6993.3,
"valid_targets_min": 2422
},
{
"epoch": 0.5305466237942122,
"grad_norm": 0.35114545580046064,
"learning_rate": 2.958904109589041e-05,
"loss": 0.6019,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.207503080368042,
"step": 55,
"valid_targets_mean": 7105.0,
"valid_targets_min": 2354
},
{
"epoch": 0.5787781350482315,
"grad_norm": 0.3193959336368111,
"learning_rate": 3.2328767123287676e-05,
"loss": 0.5868,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18717698752880096,
"step": 60,
"valid_targets_mean": 6514.3,
"valid_targets_min": 1749
},
{
"epoch": 0.6270096463022508,
"grad_norm": 0.2918586604555415,
"learning_rate": 3.506849315068493e-05,
"loss": 0.5745,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19028222560882568,
"step": 65,
"valid_targets_mean": 7214.1,
"valid_targets_min": 2845
},
{
"epoch": 0.6752411575562701,
"grad_norm": 0.32462856610800417,
"learning_rate": 3.780821917808219e-05,
"loss": 0.5558,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1807335764169693,
"step": 70,
"valid_targets_mean": 6766.1,
"valid_targets_min": 1311
},
{
"epoch": 0.7234726688102894,
"grad_norm": 0.3108931341927116,
"learning_rate": 3.999976995313839e-05,
"loss": 0.5459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1809534728527069,
"step": 75,
"valid_targets_mean": 7225.8,
"valid_targets_min": 2735
},
{
"epoch": 0.7717041800643086,
"grad_norm": 0.30171119245949446,
"learning_rate": 3.999171886864457e-05,
"loss": 0.5318,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1711938977241516,
"step": 80,
"valid_targets_mean": 6957.0,
"valid_targets_min": 2492
},
{
"epoch": 0.819935691318328,
"grad_norm": 0.3164513581760958,
"learning_rate": 3.997217073267859e-05,
"loss": 0.5179,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16384953260421753,
"step": 85,
"valid_targets_mean": 6966.7,
"valid_targets_min": 2862
},
{
"epoch": 0.8681672025723473,
"grad_norm": 0.30738984615139586,
"learning_rate": 3.9941136787191535e-05,
"loss": 0.5178,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18157875537872314,
"step": 90,
"valid_targets_mean": 7276.3,
"valid_targets_min": 2704
},
{
"epoch": 0.9163987138263665,
"grad_norm": 0.323295425342175,
"learning_rate": 3.989863487951665e-05,
"loss": 0.5085,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17602333426475525,
"step": 95,
"valid_targets_mean": 8148.8,
"valid_targets_min": 4040
},
{
"epoch": 0.9646302250803859,
"grad_norm": 0.3421041448009979,
"learning_rate": 3.984468945210548e-05,
"loss": 0.5051,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15274131298065186,
"step": 100,
"valid_targets_mean": 6615.2,
"valid_targets_min": 1450
},
{
"epoch": 1.0096463022508038,
"grad_norm": 0.30441217320888314,
"learning_rate": 3.977933152847132e-05,
"loss": 0.4967,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1587449163198471,
"step": 105,
"valid_targets_mean": 6899.7,
"valid_targets_min": 1658
},
{
"epoch": 1.0578778135048232,
"grad_norm": 0.3011845985321456,
"learning_rate": 3.9702598695347794e-05,
"loss": 0.4907,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16292162239551544,
"step": 110,
"valid_targets_mean": 7234.9,
"valid_targets_min": 2692
},
{
"epoch": 1.1061093247588425,
"grad_norm": 0.3049687116327277,
"learning_rate": 3.961453508107314e-05,
"loss": 0.4852,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14895367622375488,
"step": 115,
"valid_targets_mean": 6615.0,
"valid_targets_min": 2490
},
{
"epoch": 1.1543408360128617,
"grad_norm": 0.3769508662707374,
"learning_rate": 3.951519133021237e-05,
"loss": 0.4785,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1452217400074005,
"step": 120,
"valid_targets_mean": 6637.8,
"valid_targets_min": 1997
},
{
"epoch": 1.202572347266881,
"grad_norm": 0.3181671579805256,
"learning_rate": 3.94046245744321e-05,
"loss": 0.4809,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16087602078914642,
"step": 125,
"valid_targets_mean": 7021.9,
"valid_targets_min": 1534
},
{
"epoch": 1.2508038585209003,
"grad_norm": 0.3207028910807402,
"learning_rate": 3.928289839964459e-05,
"loss": 0.4764,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1502930223941803,
"step": 130,
"valid_targets_mean": 7277.9,
"valid_targets_min": 1007
},
{
"epoch": 1.2990353697749195,
"grad_norm": 0.32515712076357406,
"learning_rate": 3.915008280944014e-05,
"loss": 0.4767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1590278148651123,
"step": 135,
"valid_targets_mean": 7355.5,
"valid_targets_min": 2678
},
{
"epoch": 1.347266881028939,
"grad_norm": 0.2998255755095527,
"learning_rate": 3.900625418482867e-05,
"loss": 0.4751,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16431793570518494,
"step": 140,
"valid_targets_mean": 8011.2,
"valid_targets_min": 1749
},
{
"epoch": 1.3954983922829582,
"grad_norm": 0.3085173284169989,
"learning_rate": 3.885149524031366e-05,
"loss": 0.4704,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14795272052288055,
"step": 145,
"valid_targets_mean": 6751.4,
"valid_targets_min": 2326
},
{
"epoch": 1.4437299035369775,
"grad_norm": 0.3401175885992734,
"learning_rate": 3.868589497632388e-05,
"loss": 0.4664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15111419558525085,
"step": 150,
"valid_targets_mean": 6837.7,
"valid_targets_min": 2630
},
{
"epoch": 1.4919614147909968,
"grad_norm": 0.3862018866759018,
"learning_rate": 3.850954862803001e-05,
"loss": 0.4595,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15462824702262878,
"step": 155,
"valid_targets_mean": 7058.6,
"valid_targets_min": 2845
},
{
"epoch": 1.540192926045016,
"grad_norm": 0.30996367649676787,
"learning_rate": 3.8322557610575826e-05,
"loss": 0.4695,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1570480465888977,
"step": 160,
"valid_targets_mean": 7273.3,
"valid_targets_min": 2307
},
{
"epoch": 1.5884244372990355,
"grad_norm": 0.3412824337507056,
"learning_rate": 3.812502946075527e-05,
"loss": 0.4623,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16225996613502502,
"step": 165,
"valid_targets_mean": 7718.8,
"valid_targets_min": 1085
},
{
"epoch": 1.6366559485530545,
"grad_norm": 0.3077515658368193,
"learning_rate": 3.791707777516904e-05,
"loss": 0.4611,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14422276616096497,
"step": 170,
"valid_targets_mean": 6652.2,
"valid_targets_min": 2034
},
{
"epoch": 1.684887459807074,
"grad_norm": 0.33129820149764216,
"learning_rate": 3.769882214489626e-05,
"loss": 0.4593,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15183745324611664,
"step": 175,
"valid_targets_mean": 6814.9,
"valid_targets_min": 1305
},
{
"epoch": 1.7331189710610932,
"grad_norm": 0.34548902207170956,
"learning_rate": 3.7470388086718745e-05,
"loss": 0.457,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1502872258424759,
"step": 180,
"valid_targets_mean": 6735.3,
"valid_targets_min": 1814
},
{
"epoch": 1.7813504823151125,
"grad_norm": 0.3328552197812335,
"learning_rate": 3.7231906970937464e-05,
"loss": 0.4552,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15311968326568604,
"step": 185,
"valid_targets_mean": 7155.6,
"valid_targets_min": 3210
},
{
"epoch": 1.829581993569132,
"grad_norm": 0.31935109111420523,
"learning_rate": 3.6983515945822736e-05,
"loss": 0.4525,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1549065113067627,
"step": 190,
"valid_targets_mean": 7431.0,
"valid_targets_min": 2888
},
{
"epoch": 1.877813504823151,
"grad_norm": 0.3326363124007988,
"learning_rate": 3.672535785874148e-05,
"loss": 0.4459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1505606472492218,
"step": 195,
"valid_targets_mean": 6710.3,
"valid_targets_min": 2333
},
{
"epoch": 1.9260450160771705,
"grad_norm": 0.28380996162159844,
"learning_rate": 3.64575811740071e-05,
"loss": 0.4481,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1508420705795288,
"step": 200,
"valid_targets_mean": 6827.8,
"valid_targets_min": 2018
},
{
"epoch": 1.9742765273311897,
"grad_norm": 0.30735752308501874,
"learning_rate": 3.6180339887498953e-05,
"loss": 0.4473,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1658451110124588,
"step": 205,
"valid_targets_mean": 7271.1,
"valid_targets_min": 1928
},
{
"epoch": 2.0192926045016075,
"grad_norm": 0.37189914099274535,
"learning_rate": 3.589379343810083e-05,
"loss": 0.445,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1301526576280594,
"step": 210,
"valid_targets_mean": 6578.2,
"valid_targets_min": 1877
},
{
"epoch": 2.067524115755627,
"grad_norm": 0.3977228836472767,
"learning_rate": 3.559810661600907e-05,
"loss": 0.4391,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14161312580108643,
"step": 215,
"valid_targets_mean": 6960.3,
"valid_targets_min": 1300
},
{
"epoch": 2.1157556270096465,
"grad_norm": 0.37183924478533426,
"learning_rate": 3.529344946796333e-05,
"loss": 0.4439,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15209510922431946,
"step": 220,
"valid_targets_mean": 7089.7,
"valid_targets_min": 3873
},
{
"epoch": 2.1639871382636655,
"grad_norm": 0.3408414017109924,
"learning_rate": 3.4979997199454195e-05,
"loss": 0.4375,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15320946276187897,
"step": 225,
"valid_targets_mean": 7269.2,
"valid_targets_min": 2639
},
{
"epoch": 2.212218649517685,
"grad_norm": 0.30731758497614714,
"learning_rate": 3.465793007396421e-05,
"loss": 0.4348,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14076238870620728,
"step": 230,
"valid_targets_mean": 6917.7,
"valid_targets_min": 1785
},
{
"epoch": 2.260450160771704,
"grad_norm": 0.372487228314555,
"learning_rate": 3.4327433309299986e-05,
"loss": 0.4405,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14385536313056946,
"step": 235,
"valid_targets_mean": 6961.8,
"valid_targets_min": 2545
},
{
"epoch": 2.3086816720257235,
"grad_norm": 0.3312561822173933,
"learning_rate": 3.398869697107517e-05,
"loss": 0.4402,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.144433856010437,
"step": 240,
"valid_targets_mean": 6751.5,
"valid_targets_min": 2018
},
{
"epoch": 2.356913183279743,
"grad_norm": 0.35314218176188583,
"learning_rate": 3.3641915863405486e-05,
"loss": 0.4427,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13870275020599365,
"step": 245,
"valid_targets_mean": 7063.4,
"valid_targets_min": 2156
},
{
"epoch": 2.405144694533762,
"grad_norm": 0.2725162697916855,
"learning_rate": 3.328728941687871e-05,
"loss": 0.4288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14645615220069885,
"step": 250,
"valid_targets_mean": 7152.3,
"valid_targets_min": 2078
},
{
"epoch": 2.4533762057877815,
"grad_norm": 0.30613274466141416,
"learning_rate": 3.292502157386397e-05,
"loss": 0.4357,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1441894918680191,
"step": 255,
"valid_targets_mean": 7401.5,
"valid_targets_min": 2283
},
{
"epoch": 2.5016077170418005,
"grad_norm": 0.30760789389816223,
"learning_rate": 3.2555320671226405e-05,
"loss": 0.4327,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14768892526626587,
"step": 260,
"valid_targets_mean": 6794.9,
"valid_targets_min": 1650
},
{
"epoch": 2.54983922829582,
"grad_norm": 0.29539485332618615,
"learning_rate": 3.217839932051457e-05,
"loss": 0.4325,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1291293054819107,
"step": 265,
"valid_targets_mean": 6756.1,
"valid_targets_min": 2595
},
{
"epoch": 2.598070739549839,
"grad_norm": 0.2844011979442231,
"learning_rate": 3.179447428568952e-05,
"loss": 0.4306,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13877539336681366,
"step": 270,
"valid_targets_mean": 6815.4,
"valid_targets_min": 2346
},
{
"epoch": 2.6463022508038585,
"grad_norm": 0.2939840972561895,
"learning_rate": 3.1403766358465833e-05,
"loss": 0.4321,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14215871691703796,
"step": 275,
"valid_targets_mean": 6699.9,
"valid_targets_min": 2662
},
{
"epoch": 2.694533762057878,
"grad_norm": 0.33704883535034413,
"learning_rate": 3.100650023133643e-05,
"loss": 0.4293,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15095305442810059,
"step": 280,
"valid_targets_mean": 6936.8,
"valid_targets_min": 3262
},
{
"epoch": 2.742765273311897,
"grad_norm": 0.30435858181327363,
"learning_rate": 3.060290436835392e-05,
"loss": 0.434,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14832210540771484,
"step": 285,
"valid_targets_mean": 7377.8,
"valid_targets_min": 3814
},
{
"epoch": 2.7909967845659165,
"grad_norm": 0.3038922987615049,
"learning_rate": 3.019321087374313e-05,
"loss": 0.4355,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1452721804380417,
"step": 290,
"valid_targets_mean": 7521.8,
"valid_targets_min": 3620
},
{
"epoch": 2.839228295819936,
"grad_norm": 0.2917713716711792,
"learning_rate": 2.977765535842007e-05,
"loss": 0.4329,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14556115865707397,
"step": 295,
"valid_targets_mean": 7321.2,
"valid_targets_min": 2823
},
{
"epoch": 2.887459807073955,
"grad_norm": 0.29939071791258476,
"learning_rate": 2.9356476804494306e-05,
"loss": 0.4286,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14382721483707428,
"step": 300,
"valid_targets_mean": 7452.7,
"valid_targets_min": 3738
},
{
"epoch": 2.935691318327974,
"grad_norm": 0.31181832384197283,
"learning_rate": 2.892991742783259e-05,
"loss": 0.4296,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13677389919757843,
"step": 305,
"valid_targets_mean": 6679.7,
"valid_targets_min": 1975
},
{
"epoch": 2.9839228295819935,
"grad_norm": 0.29910249575281106,
"learning_rate": 2.8498222538762737e-05,
"loss": 0.4283,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1349104940891266,
"step": 310,
"valid_targets_mean": 7041.6,
"valid_targets_min": 1534
},
{
"epoch": 3.0289389067524115,
"grad_norm": 0.30659296462604446,
"learning_rate": 2.8061640400997966e-05,
"loss": 0.4285,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12650442123413086,
"step": 315,
"valid_targets_mean": 6438.3,
"valid_targets_min": 1711
},
{
"epoch": 3.077170418006431,
"grad_norm": 0.31032562331811075,
"learning_rate": 2.7620422088862736e-05,
"loss": 0.4192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13721013069152832,
"step": 320,
"valid_targets_mean": 6582.1,
"valid_targets_min": 2016
},
{
"epoch": 3.12540192926045,
"grad_norm": 0.298343154549212,
"learning_rate": 2.7174821342902234e-05,
"loss": 0.4203,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1312987208366394,
"step": 325,
"valid_targets_mean": 6630.8,
"valid_targets_min": 1708
},
{
"epoch": 3.1736334405144695,
"grad_norm": 0.32270907072518445,
"learning_rate": 2.6725094423958574e-05,
"loss": 0.4257,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12568901479244232,
"step": 330,
"valid_targets_mean": 6373.9,
"valid_targets_min": 2413
},
{
"epoch": 3.221864951768489,
"grad_norm": 0.33685989702154473,
"learning_rate": 2.6271499965797532e-05,
"loss": 0.4209,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14049115777015686,
"step": 335,
"valid_targets_mean": 6983.8,
"valid_targets_min": 1906
},
{
"epoch": 3.270096463022508,
"grad_norm": 0.3224358346310272,
"learning_rate": 2.5814298826370702e-05,
"loss": 0.4234,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1452215611934662,
"step": 340,
"valid_targets_mean": 7346.3,
"valid_targets_min": 1632
},
{
"epoch": 3.3183279742765275,
"grad_norm": 0.3190857528054468,
"learning_rate": 2.5353753937798527e-05,
"loss": 0.4197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14950251579284668,
"step": 345,
"valid_targets_mean": 7413.2,
"valid_targets_min": 3670
},
{
"epoch": 3.3665594855305465,
"grad_norm": 0.27960704518791396,
"learning_rate": 2.4890130155160427e-05,
"loss": 0.4199,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14225219190120697,
"step": 350,
"valid_targets_mean": 7716.9,
"valid_targets_min": 2063
},
{
"epoch": 3.414790996784566,
"grad_norm": 0.2806138098599385,
"learning_rate": 2.4423694104179176e-05,
"loss": 0.4195,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14240960776805878,
"step": 355,
"valid_targets_mean": 6942.1,
"valid_targets_min": 2201
},
{
"epoch": 3.463022508038585,
"grad_norm": 0.3184400082207043,
"learning_rate": 2.3954714027886904e-05,
"loss": 0.4137,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14079201221466064,
"step": 360,
"valid_targets_mean": 7222.4,
"valid_targets_min": 2599
},
{
"epoch": 3.5112540192926045,
"grad_norm": 0.3007136487008627,
"learning_rate": 2.3483459632361e-05,
"loss": 0.4151,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13650323450565338,
"step": 365,
"valid_targets_mean": 6989.0,
"valid_targets_min": 3489
},
{
"epoch": 3.559485530546624,
"grad_norm": 0.3265478005060877,
"learning_rate": 2.3010201931618696e-05,
"loss": 0.4159,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1319654881954193,
"step": 370,
"valid_targets_mean": 7079.9,
"valid_targets_min": 2563
},
{
"epoch": 3.607717041800643,
"grad_norm": 0.3948009132445448,
"learning_rate": 2.2535213091759404e-05,
"loss": 0.4141,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13655099272727966,
"step": 375,
"valid_targets_mean": 6693.5,
"valid_targets_min": 2016
},
{
"epoch": 3.6559485530546625,
"grad_norm": 0.3019824650671563,
"learning_rate": 2.205876627444452e-05,
"loss": 0.4213,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13880327343940735,
"step": 380,
"valid_targets_mean": 7379.9,
"valid_targets_min": 2742
},
{
"epoch": 3.7041800643086815,
"grad_norm": 0.2737001441559615,
"learning_rate": 2.1581135479804735e-05,
"loss": 0.42,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14117969572544098,
"step": 385,
"valid_targets_mean": 7399.1,
"valid_targets_min": 2818
},
{
"epoch": 3.752411575562701,
"grad_norm": 0.2638773595163275,
"learning_rate": 2.1102595388865054e-05,
"loss": 0.4152,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14235882461071014,
"step": 390,
"valid_targets_mean": 7069.0,
"valid_targets_min": 2084
},
{
"epoch": 3.80064308681672,
"grad_norm": 0.28040665496341327,
"learning_rate": 2.062342120557834e-05,
"loss": 0.42,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13389542698860168,
"step": 395,
"valid_targets_mean": 7465.1,
"valid_targets_min": 2377
},
{
"epoch": 3.8488745980707395,
"grad_norm": 0.31446821190054974,
"learning_rate": 2.0143888498558046e-05,
"loss": 0.415,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14552685618400574,
"step": 400,
"valid_targets_mean": 7290.2,
"valid_targets_min": 2307
},
{
"epoch": 3.897106109324759,
"grad_norm": 0.279723657397229,
"learning_rate": 1.9664273042601302e-05,
"loss": 0.4243,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13550975918769836,
"step": 405,
"valid_targets_mean": 6575.1,
"valid_targets_min": 3659
},
{
"epoch": 3.945337620578778,
"grad_norm": 0.30048258496006014,
"learning_rate": 1.918485066009338e-05,
"loss": 0.4175,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13949520885944366,
"step": 410,
"valid_targets_mean": 6686.4,
"valid_targets_min": 2947
},
{
"epoch": 3.9935691318327975,
"grad_norm": 0.2867474230478217,
"learning_rate": 1.87058970623848e-05,
"loss": 0.4122,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13645842671394348,
"step": 415,
"valid_targets_mean": 7611.5,
"valid_targets_min": 4001
},
{
"epoch": 4.038585209003215,
"grad_norm": 0.32425630072343237,
"learning_rate": 1.8227687691232322e-05,
"loss": 0.4168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14129070937633514,
"step": 420,
"valid_targets_mean": 7769.8,
"valid_targets_min": 2240
},
{
"epoch": 4.086816720257235,
"grad_norm": 0.2748205187855077,
"learning_rate": 1.7750497560394918e-05,
"loss": 0.4123,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13103340566158295,
"step": 425,
"valid_targets_mean": 7042.3,
"valid_targets_min": 2721
},
{
"epoch": 4.135048231511254,
"grad_norm": 0.31593879087572435,
"learning_rate": 1.7274601097475957e-05,
"loss": 0.4148,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12848451733589172,
"step": 430,
"valid_targets_mean": 7032.7,
"valid_targets_min": 620
},
{
"epoch": 4.183279742765273,
"grad_norm": 0.2928706432235063,
"learning_rate": 1.6800271986102418e-05,
"loss": 0.4111,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13245472311973572,
"step": 435,
"valid_targets_mean": 6995.4,
"valid_targets_min": 2509
},
{
"epoch": 4.231511254019293,
"grad_norm": 0.294138005369505,
"learning_rate": 1.6327783008532e-05,
"loss": 0.4115,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1609172821044922,
"step": 440,
"valid_targets_mean": 7456.9,
"valid_targets_min": 2912
},
{
"epoch": 4.279742765273312,
"grad_norm": 0.26343752394612546,
"learning_rate": 1.5857405888778568e-05,
"loss": 0.4048,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13041119277477264,
"step": 445,
"valid_targets_mean": 6549.4,
"valid_targets_min": 2315
},
{
"epoch": 4.327974276527331,
"grad_norm": 0.26515284362959823,
"learning_rate": 1.5389411136346225e-05,
"loss": 0.4165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1363140195608139,
"step": 450,
"valid_targets_mean": 6877.8,
"valid_targets_min": 2060
},
{
"epoch": 4.37620578778135,
"grad_norm": 0.3341389590190934,
"learning_rate": 1.4924067890661778e-05,
"loss": 0.4085,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14212492108345032,
"step": 455,
"valid_targets_mean": 7122.2,
"valid_targets_min": 2552
},
{
"epoch": 4.42443729903537,
"grad_norm": 0.305739315850788,
"learning_rate": 1.4461643766295196e-05,
"loss": 0.4115,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13609100878238678,
"step": 460,
"valid_targets_mean": 7076.4,
"valid_targets_min": 2084
},
{
"epoch": 4.472668810289389,
"grad_norm": 0.3425663444220552,
"learning_rate": 1.4002404699056946e-05,
"loss": 0.4088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14678135514259338,
"step": 465,
"valid_targets_mean": 7620.5,
"valid_targets_min": 3262
},
{
"epoch": 4.520900321543408,
"grad_norm": 0.28442860046969476,
"learning_rate": 1.3546614793060757e-05,
"loss": 0.4089,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12935858964920044,
"step": 470,
"valid_targets_mean": 6770.5,
"valid_targets_min": 1312
},
{
"epoch": 4.569131832797428,
"grad_norm": 0.27599575997018294,
"learning_rate": 1.3094536168839853e-05,
"loss": 0.4053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1286410689353943,
"step": 475,
"valid_targets_mean": 6730.8,
"valid_targets_min": 2512
},
{
"epoch": 4.617363344051447,
"grad_norm": 0.2847734396443323,
"learning_rate": 1.2646428812603838e-05,
"loss": 0.4039,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1336403489112854,
"step": 480,
"valid_targets_mean": 6643.8,
"valid_targets_min": 616
},
{
"epoch": 4.665594855305466,
"grad_norm": 0.28455785481267665,
"learning_rate": 1.2202550426723053e-05,
"loss": 0.4041,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12907546758651733,
"step": 485,
"valid_targets_mean": 6958.6,
"valid_targets_min": 1876
},
{
"epoch": 4.713826366559486,
"grad_norm": 0.2584978256745778,
"learning_rate": 1.1763156281526348e-05,
"loss": 0.4106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1278737336397171,
"step": 490,
"valid_targets_mean": 6800.9,
"valid_targets_min": 2535
},
{
"epoch": 4.762057877813505,
"grad_norm": 0.2641322558149948,
"learning_rate": 1.1328499068497478e-05,
"loss": 0.4102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1252935379743576,
"step": 495,
"valid_targets_mean": 6629.9,
"valid_targets_min": 1450
},
{
"epoch": 4.810289389067524,
"grad_norm": 0.2418848744577093,
"learning_rate": 1.0898828754954618e-05,
"loss": 0.4073,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1492045819759369,
"step": 500,
"valid_targets_mean": 7692.5,
"valid_targets_min": 2454
},
{
"epoch": 4.858520900321543,
"grad_norm": 0.2609084515860585,
"learning_rate": 1.047439244029642e-05,
"loss": 0.4088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13392722606658936,
"step": 505,
"valid_targets_mean": 6826.6,
"valid_targets_min": 2617
},
{
"epoch": 4.906752411575563,
"grad_norm": 0.25321438392795964,
"learning_rate": 1.0055434213897529e-05,
"loss": 0.4047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1320946365594864,
"step": 510,
"valid_targets_mean": 7162.3,
"valid_targets_min": 2273
},
{
"epoch": 4.954983922829582,
"grad_norm": 0.2717500389466323,
"learning_rate": 9.642195014734972e-06,
"loss": 0.4104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1381416916847229,
"step": 515,
"valid_targets_mean": 7403.7,
"valid_targets_min": 3487
},
{
"epoch": 5.0,
"grad_norm": 0.30446110389596137,
"learning_rate": 9.234912492826454e-06,
"loss": 0.4062,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2185996174812317,
"step": 520,
"valid_targets_mean": 7307.2,
"valid_targets_min": 2603
},
{
"epoch": 5.048231511254019,
"grad_norm": 0.2607207892888904,
"learning_rate": 8.833820872560035e-06,
"loss": 0.404,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12400539219379425,
"step": 525,
"valid_targets_mean": 6264.1,
"valid_targets_min": 1360
},
{
"epoch": 5.096463022508039,
"grad_norm": 0.2449802169204654,
"learning_rate": 8.439150817993836e-06,
"loss": 0.4039,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13310736417770386,
"step": 530,
"valid_targets_mean": 7255.7,
"valid_targets_min": 3579
},
{
"epoch": 5.144694533762058,
"grad_norm": 0.26180442969567413,
"learning_rate": 8.051129300203324e-06,
"loss": 0.4042,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13548702001571655,
"step": 535,
"valid_targets_mean": 7463.6,
"valid_targets_min": 2512
},
{
"epoch": 5.192926045016077,
"grad_norm": 0.2536175353972523,
"learning_rate": 7.669979466752322e-06,
"loss": 0.4096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14918869733810425,
"step": 540,
"valid_targets_mean": 7186.4,
"valid_targets_min": 1709
},
{
"epoch": 5.241157556270096,
"grad_norm": 0.29261182327763363,
"learning_rate": 7.295920513362957e-06,
"loss": 0.4025,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14041003584861755,
"step": 545,
"valid_targets_mean": 7639.5,
"valid_targets_min": 3656
},
{
"epoch": 5.289389067524116,
"grad_norm": 0.24399791711337562,
"learning_rate": 6.92916755785821e-06,
"loss": 0.4075,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13997626304626465,
"step": 550,
"valid_targets_mean": 6806.3,
"valid_targets_min": 2858
},
{
"epoch": 5.337620578778135,
"grad_norm": 0.24929427616550545,
"learning_rate": 6.5699315164496635e-06,
"loss": 0.4026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1325225830078125,
"step": 555,
"valid_targets_mean": 6992.0,
"valid_targets_min": 784
},
{
"epoch": 5.385852090032154,
"grad_norm": 0.26276669481415915,
"learning_rate": 6.2184189824415855e-06,
"loss": 0.4059,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13438841700553894,
"step": 560,
"valid_targets_mean": 7446.2,
"valid_targets_min": 3927
},
{
"epoch": 5.434083601286174,
"grad_norm": 0.2402023567433913,
"learning_rate": 5.87483210742098e-06,
"loss": 0.407,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1273195445537567,
"step": 565,
"valid_targets_mean": 6891.7,
"valid_targets_min": 2511
},
{
"epoch": 5.482315112540193,
"grad_norm": 0.24563573813966255,
"learning_rate": 5.539368485002161e-06,
"loss": 0.4,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14178407192230225,
"step": 570,
"valid_targets_mean": 7711.9,
"valid_targets_min": 2782
},
{
"epoch": 5.530546623794212,
"grad_norm": 0.2469427497751035,
"learning_rate": 5.21222103719244e-06,
"loss": 0.4055,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1396293342113495,
"step": 575,
"valid_targets_mean": 6963.2,
"valid_targets_min": 2701
},
{
"epoch": 5.578778135048232,
"grad_norm": 0.23786230223199137,
"learning_rate": 4.893577903444524e-06,
"loss": 0.4023,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12411302328109741,
"step": 580,
"valid_targets_mean": 6821.6,
"valid_targets_min": 2945
},
{
"epoch": 5.627009646302251,
"grad_norm": 0.24637334341544995,
"learning_rate": 4.58362233245923e-06,
"loss": 0.3995,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14157408475875854,
"step": 585,
"valid_targets_mean": 7340.6,
"valid_targets_min": 2735
},
{
"epoch": 5.67524115755627,
"grad_norm": 0.24411500634429661,
"learning_rate": 4.2825325768008905e-06,
"loss": 0.4042,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1333349496126175,
"step": 590,
"valid_targets_mean": 7271.8,
"valid_targets_min": 2419
},
{
"epoch": 5.723472668810289,
"grad_norm": 0.23800985861462287,
"learning_rate": 3.990481790385963e-06,
"loss": 0.4002,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12876483798027039,
"step": 595,
"valid_targets_mean": 6977.3,
"valid_targets_min": 1671
},
{
"epoch": 5.771704180064309,
"grad_norm": 0.24538130692836058,
"learning_rate": 3.7076379289037755e-06,
"loss": 0.4069,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12505599856376648,
"step": 600,
"valid_targets_mean": 6450.1,
"valid_targets_min": 2535
},
{
"epoch": 5.819935691318328,
"grad_norm": 0.2281308139817817,
"learning_rate": 3.4341636532268476e-06,
"loss": 0.399,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14104759693145752,
"step": 605,
"valid_targets_mean": 7660.5,
"valid_targets_min": 2583
},
{
"epoch": 5.868167202572347,
"grad_norm": 0.22646861694596795,
"learning_rate": 3.170216235866075e-06,
"loss": 0.403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1284317672252655,
"step": 610,
"valid_targets_mean": 7120.9,
"valid_targets_min": 2428
},
{
"epoch": 5.916398713826366,
"grad_norm": 0.23622946188916152,
"learning_rate": 2.9159474705248093e-06,
"loss": 0.4056,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1416313201189041,
"step": 615,
"valid_targets_mean": 7431.3,
"valid_targets_min": 3551
},
{
"epoch": 5.964630225080386,
"grad_norm": 0.2415011553269361,
"learning_rate": 2.6715035848036962e-06,
"loss": 0.407,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14078471064567566,
"step": 620,
"valid_targets_mean": 7541.7,
"valid_targets_min": 3135
},
{
"epoch": 6.009646302250804,
"grad_norm": 0.2988974462268002,
"learning_rate": 2.4370251561065363e-06,
"loss": 0.4043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1267852485179901,
"step": 625,
"valid_targets_mean": 7117.3,
"valid_targets_min": 2271
},
{
"epoch": 6.057877813504823,
"grad_norm": 0.22684489644445052,
"learning_rate": 2.2126470307955515e-06,
"loss": 0.4,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14647862315177917,
"step": 630,
"valid_targets_mean": 7404.9,
"valid_targets_min": 3196
},
{
"epoch": 6.106109324758842,
"grad_norm": 0.2318713634251901,
"learning_rate": 1.998498246642464e-06,
"loss": 0.4046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14780129492282867,
"step": 635,
"valid_targets_mean": 7828.9,
"valid_targets_min": 2325
},
{
"epoch": 6.154340836012862,
"grad_norm": 0.2517163898615774,
"learning_rate": 1.7947019586201152e-06,
"loss": 0.402,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14310380816459656,
"step": 640,
"valid_targets_mean": 7404.0,
"valid_targets_min": 2411
},
{
"epoch": 6.202572347266881,
"grad_norm": 0.23081203575236892,
"learning_rate": 1.6013753680771493e-06,
"loss": 0.4019,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14563211798667908,
"step": 645,
"valid_targets_mean": 7396.6,
"valid_targets_min": 688
},
{
"epoch": 6.2508038585209,
"grad_norm": 0.232476920350471,
"learning_rate": 1.4186296553366274e-06,
"loss": 0.4073,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13480687141418457,
"step": 650,
"valid_targets_mean": 7193.0,
"valid_targets_min": 2283
},
{
"epoch": 6.29903536977492,
"grad_norm": 0.32208230826818174,
"learning_rate": 1.246569915757263e-06,
"loss": 0.4038,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1413307934999466,
"step": 655,
"valid_targets_mean": 6981.1,
"valid_targets_min": 1723
},
{
"epoch": 6.347266881028939,
"grad_norm": 0.24094886248637287,
"learning_rate": 1.0852950992940415e-06,
"loss": 0.4024,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1421469748020172,
"step": 660,
"valid_targets_mean": 7026.6,
"valid_targets_min": 3719
},
{
"epoch": 6.395498392282958,
"grad_norm": 0.2336825012086835,
"learning_rate": 9.348979535930391e-07,
"loss": 0.3998,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14628173410892487,
"step": 665,
"valid_targets_mean": 7186.3,
"valid_targets_min": 2563
},
{
"epoch": 6.443729903536978,
"grad_norm": 0.23225672605953232,
"learning_rate": 7.95464970653106e-07,
"loss": 0.4027,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14211036264896393,
"step": 670,
"valid_targets_mean": 7271.8,
"valid_targets_min": 1463
},
{
"epoch": 6.491961414790997,
"grad_norm": 0.22536427823739902,
"learning_rate": 6.670763370851241e-07,
"loss": 0.3971,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13998502492904663,
"step": 675,
"valid_targets_mean": 7128.0,
"valid_targets_min": 2704
},
{
"epoch": 6.540192926045016,
"grad_norm": 0.23238632103019738,
"learning_rate": 5.4980588799743e-07,
"loss": 0.3984,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13454963266849518,
"step": 680,
"valid_targets_mean": 6974.1,
"valid_targets_min": 3551
},
{
"epoch": 6.588424437299035,
"grad_norm": 0.22827166361639628,
"learning_rate": 4.4372106453394405e-07,
"loss": 0.4015,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11654944717884064,
"step": 685,
"valid_targets_mean": 6038.2,
"valid_targets_min": 828
},
{
"epoch": 6.636655948553055,
"grad_norm": 0.21370459518374998,
"learning_rate": 3.48882875089378e-07,
"loss": 0.3977,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12826097011566162,
"step": 690,
"valid_targets_mean": 6884.0,
"valid_targets_min": 2654
},
{
"epoch": 6.684887459807074,
"grad_norm": 0.22138782542292237,
"learning_rate": 2.653458602238845e-07,
"loss": 0.407,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12427161633968353,
"step": 695,
"valid_targets_mean": 6643.1,
"valid_targets_min": 2901
},
{
"epoch": 6.733118971061093,
"grad_norm": 0.21954025300724495,
"learning_rate": 1.931580612972983e-07,
"loss": 0.4057,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13527044653892517,
"step": 700,
"valid_targets_mean": 6924.3,
"valid_targets_min": 2375
},
{
"epoch": 6.781350482315112,
"grad_norm": 0.22594474295726047,
"learning_rate": 1.3236099284097415e-07,
"loss": 0.4041,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14596149325370789,
"step": 705,
"valid_targets_mean": 7521.6,
"valid_targets_min": 2823
},
{
"epoch": 6.829581993569132,
"grad_norm": 0.2337034847019303,
"learning_rate": 8.298961868318689e-08,
"loss": 0.3964,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1245846375823021,
"step": 710,
"valid_targets_mean": 6552.6,
"valid_targets_min": 1658
},
{
"epoch": 6.877813504823151,
"grad_norm": 0.22405182437817006,
"learning_rate": 4.507233184174675e-08,
"loss": 0.4001,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12990376353263855,
"step": 715,
"valid_targets_mean": 7562.9,
"valid_targets_min": 3556
},
{
"epoch": 6.92604501607717,
"grad_norm": 0.22599182393237854,
"learning_rate": 1.863093819545192e-08,
"loss": 0.3993,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13830092549324036,
"step": 720,
"valid_targets_mean": 7532.1,
"valid_targets_min": 3375
},
{
"epoch": 6.97427652733119,
"grad_norm": 0.22933841268273067,
"learning_rate": 3.680643943708706e-09,
"loss": 0.4013,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13168975710868835,
"step": 725,
"valid_targets_mean": 7441.9,
"valid_targets_min": 3674
},
{
"epoch": 7.0,
"step": 728,
"total_flos": 2.6866574226798674e+18,
"train_loss": 0.0,
"train_runtime": 0.825,
"train_samples_per_second": 84297.403,
"train_steps_per_second": 882.429
}
],
"logging_steps": 5,
"max_steps": 728,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.6866574226798674e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}