Model: laion/sft__stackexchange-tezos-sandboxes__Kimi-2-5-smaxeps-32k__Qwen3-8B Source: Original Platform
1434 lines
39 KiB
JSON
1434 lines
39 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 630,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.05555555555555555,
|
|
"grad_norm": 19.037475031095614,
|
|
"learning_rate": 2.53968253968254e-06,
|
|
"loss": 0.9975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3319109082221985,
|
|
"step": 5,
|
|
"valid_targets_mean": 3084.8,
|
|
"valid_targets_min": 330
|
|
},
|
|
{
|
|
"epoch": 0.1111111111111111,
|
|
"grad_norm": 10.063492319722197,
|
|
"learning_rate": 5.7142857142857145e-06,
|
|
"loss": 0.9275,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.3317878544330597,
|
|
"step": 10,
|
|
"valid_targets_mean": 4345.8,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 0.16666666666666666,
|
|
"grad_norm": 2.3475297969942397,
|
|
"learning_rate": 8.888888888888888e-06,
|
|
"loss": 0.7902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24648834764957428,
|
|
"step": 15,
|
|
"valid_targets_mean": 4131.6,
|
|
"valid_targets_min": 1216
|
|
},
|
|
{
|
|
"epoch": 0.2222222222222222,
|
|
"grad_norm": 1.476580677728524,
|
|
"learning_rate": 1.2063492063492064e-05,
|
|
"loss": 0.7268,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21647010743618011,
|
|
"step": 20,
|
|
"valid_targets_mean": 3768.8,
|
|
"valid_targets_min": 404
|
|
},
|
|
{
|
|
"epoch": 0.2777777777777778,
|
|
"grad_norm": 0.9495836410332217,
|
|
"learning_rate": 1.523809523809524e-05,
|
|
"loss": 0.6872,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22024014592170715,
|
|
"step": 25,
|
|
"valid_targets_mean": 3784.8,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 0.3333333333333333,
|
|
"grad_norm": 0.6124846458887409,
|
|
"learning_rate": 1.8412698412698415e-05,
|
|
"loss": 0.6531,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20419517159461975,
|
|
"step": 30,
|
|
"valid_targets_mean": 3684.2,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 0.3888888888888889,
|
|
"grad_norm": 0.5901105201493042,
|
|
"learning_rate": 2.158730158730159e-05,
|
|
"loss": 0.6291,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20678561925888062,
|
|
"step": 35,
|
|
"valid_targets_mean": 3448.4,
|
|
"valid_targets_min": 309
|
|
},
|
|
{
|
|
"epoch": 0.4444444444444444,
|
|
"grad_norm": 0.4804068677241246,
|
|
"learning_rate": 2.4761904761904766e-05,
|
|
"loss": 0.6125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1943480521440506,
|
|
"step": 40,
|
|
"valid_targets_mean": 3677.9,
|
|
"valid_targets_min": 375
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"grad_norm": 0.4044840533678469,
|
|
"learning_rate": 2.7936507936507936e-05,
|
|
"loss": 0.5954,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2216871678829193,
|
|
"step": 45,
|
|
"valid_targets_mean": 3833.2,
|
|
"valid_targets_min": 420
|
|
},
|
|
{
|
|
"epoch": 0.5555555555555556,
|
|
"grad_norm": 0.37401717077276986,
|
|
"learning_rate": 3.111111111111112e-05,
|
|
"loss": 0.5695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21114522218704224,
|
|
"step": 50,
|
|
"valid_targets_mean": 4082.9,
|
|
"valid_targets_min": 394
|
|
},
|
|
{
|
|
"epoch": 0.6111111111111112,
|
|
"grad_norm": 0.38366050666143015,
|
|
"learning_rate": 3.4285714285714284e-05,
|
|
"loss": 0.546,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18125328421592712,
|
|
"step": 55,
|
|
"valid_targets_mean": 4694.8,
|
|
"valid_targets_min": 1228
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"grad_norm": 0.3705526274838359,
|
|
"learning_rate": 3.7460317460317464e-05,
|
|
"loss": 0.5496,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1779080033302307,
|
|
"step": 60,
|
|
"valid_targets_mean": 3558.2,
|
|
"valid_targets_min": 405
|
|
},
|
|
{
|
|
"epoch": 0.7222222222222222,
|
|
"grad_norm": 0.3543083921384607,
|
|
"learning_rate": 3.9999693004141615e-05,
|
|
"loss": 0.5259,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17807340621948242,
|
|
"step": 65,
|
|
"valid_targets_mean": 4056.4,
|
|
"valid_targets_min": 363
|
|
},
|
|
{
|
|
"epoch": 0.7777777777777778,
|
|
"grad_norm": 0.3961065249343112,
|
|
"learning_rate": 3.998894913865352e-05,
|
|
"loss": 0.5241,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18150421977043152,
|
|
"step": 70,
|
|
"valid_targets_mean": 3501.2,
|
|
"valid_targets_min": 477
|
|
},
|
|
{
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 0.34959954993974884,
|
|
"learning_rate": 3.9962864903591375e-05,
|
|
"loss": 0.5204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14983993768692017,
|
|
"step": 75,
|
|
"valid_targets_mean": 2969.7,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 0.8888888888888888,
|
|
"grad_norm": 0.3074663242298868,
|
|
"learning_rate": 3.992146031710637e-05,
|
|
"loss": 0.5089,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1618027687072754,
|
|
"step": 80,
|
|
"valid_targets_mean": 3615.1,
|
|
"valid_targets_min": 554
|
|
},
|
|
{
|
|
"epoch": 0.9444444444444444,
|
|
"grad_norm": 0.37613593537441964,
|
|
"learning_rate": 3.9864767154838864e-05,
|
|
"loss": 0.4974,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15451735258102417,
|
|
"step": 85,
|
|
"valid_targets_mean": 3800.1,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 0.34297142040995393,
|
|
"learning_rate": 3.9792828925532376e-05,
|
|
"loss": 0.4945,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1645517349243164,
|
|
"step": 90,
|
|
"valid_targets_mean": 3238.2,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 1.0555555555555556,
|
|
"grad_norm": 0.35120256454353804,
|
|
"learning_rate": 3.970570083764316e-05,
|
|
"loss": 0.48,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15994709730148315,
|
|
"step": 95,
|
|
"valid_targets_mean": 3860.9,
|
|
"valid_targets_min": 472
|
|
},
|
|
{
|
|
"epoch": 1.1111111111111112,
|
|
"grad_norm": 0.3323790661514736,
|
|
"learning_rate": 3.9603449756970877e-05,
|
|
"loss": 0.4807,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1775439828634262,
|
|
"step": 100,
|
|
"valid_targets_mean": 4234.1,
|
|
"valid_targets_min": 1927
|
|
},
|
|
{
|
|
"epoch": 1.1666666666666667,
|
|
"grad_norm": 0.31504289803726854,
|
|
"learning_rate": 3.948615415534294e-05,
|
|
"loss": 0.479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1668548285961151,
|
|
"step": 105,
|
|
"valid_targets_mean": 3562.2,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 1.2222222222222223,
|
|
"grad_norm": 0.321625057142259,
|
|
"learning_rate": 3.9353904050391874e-05,
|
|
"loss": 0.4686,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15156182646751404,
|
|
"step": 110,
|
|
"valid_targets_mean": 4068.0,
|
|
"valid_targets_min": 1105
|
|
},
|
|
{
|
|
"epoch": 1.2777777777777777,
|
|
"grad_norm": 0.3226596300442187,
|
|
"learning_rate": 3.9206800936472e-05,
|
|
"loss": 0.4699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14588865637779236,
|
|
"step": 115,
|
|
"valid_targets_mean": 3658.9,
|
|
"valid_targets_min": 268
|
|
},
|
|
{
|
|
"epoch": 1.3333333333333333,
|
|
"grad_norm": 0.36413926771355065,
|
|
"learning_rate": 3.904495770676831e-05,
|
|
"loss": 0.4818,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15858052670955658,
|
|
"step": 120,
|
|
"valid_targets_mean": 4093.0,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 1.3888888888888888,
|
|
"grad_norm": 0.3641062040000603,
|
|
"learning_rate": 3.886849856665746e-05,
|
|
"loss": 0.468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15991882979869843,
|
|
"step": 125,
|
|
"valid_targets_mean": 3447.6,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 1.4444444444444444,
|
|
"grad_norm": 0.34910971297050825,
|
|
"learning_rate": 3.8677558938387276e-05,
|
|
"loss": 0.4596,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13589301705360413,
|
|
"step": 130,
|
|
"valid_targets_mean": 3486.2,
|
|
"valid_targets_min": 253
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"grad_norm": 0.3421823874745836,
|
|
"learning_rate": 3.8472285357147966e-05,
|
|
"loss": 0.465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13018383085727692,
|
|
"step": 135,
|
|
"valid_targets_mean": 3037.4,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 1.5555555555555556,
|
|
"grad_norm": 0.33111356038555095,
|
|
"learning_rate": 3.825283535861476e-05,
|
|
"loss": 0.461,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15683633089065552,
|
|
"step": 140,
|
|
"valid_targets_mean": 3742.4,
|
|
"valid_targets_min": 262
|
|
},
|
|
{
|
|
"epoch": 1.6111111111111112,
|
|
"grad_norm": 0.32069041745575666,
|
|
"learning_rate": 3.801937735804838e-05,
|
|
"loss": 0.4573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15576741099357605,
|
|
"step": 145,
|
|
"valid_targets_mean": 4067.6,
|
|
"valid_targets_min": 1037
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.33546640012641216,
|
|
"learning_rate": 3.777209052104598e-05,
|
|
"loss": 0.4602,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1551869809627533,
|
|
"step": 150,
|
|
"valid_targets_mean": 3980.7,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 1.7222222222222223,
|
|
"grad_norm": 0.3363492039248712,
|
|
"learning_rate": 3.7511164626041823e-05,
|
|
"loss": 0.455,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1573401391506195,
|
|
"step": 155,
|
|
"valid_targets_mean": 3769.0,
|
|
"valid_targets_min": 524
|
|
},
|
|
{
|
|
"epoch": 1.7777777777777777,
|
|
"grad_norm": 0.35299852743300064,
|
|
"learning_rate": 3.7236799918663284e-05,
|
|
"loss": 0.4544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.17174802720546722,
|
|
"step": 160,
|
|
"valid_targets_mean": 4081.9,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 1.8333333333333335,
|
|
"grad_norm": 0.3253099455609108,
|
|
"learning_rate": 3.6949206958053825e-05,
|
|
"loss": 0.4473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16645416617393494,
|
|
"step": 165,
|
|
"valid_targets_mean": 3841.2,
|
|
"valid_targets_min": 382
|
|
},
|
|
{
|
|
"epoch": 1.8888888888888888,
|
|
"grad_norm": 0.33812215316215416,
|
|
"learning_rate": 3.6648606455280944e-05,
|
|
"loss": 0.4503,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15572044253349304,
|
|
"step": 170,
|
|
"valid_targets_mean": 3896.2,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 1.9444444444444444,
|
|
"grad_norm": 0.37673915528072716,
|
|
"learning_rate": 3.633522910395314e-05,
|
|
"loss": 0.4467,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1881042718887329,
|
|
"step": 175,
|
|
"valid_targets_mean": 4453.6,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 0.3407164964866703,
|
|
"learning_rate": 3.6009315403175786e-05,
|
|
"loss": 0.4529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13232126832008362,
|
|
"step": 180,
|
|
"valid_targets_mean": 3291.1,
|
|
"valid_targets_min": 404
|
|
},
|
|
{
|
|
"epoch": 2.0555555555555554,
|
|
"grad_norm": 0.3413029488652493,
|
|
"learning_rate": 3.567111547298194e-05,
|
|
"loss": 0.4301,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1630811244249344,
|
|
"step": 185,
|
|
"valid_targets_mean": 4086.1,
|
|
"valid_targets_min": 1557
|
|
},
|
|
{
|
|
"epoch": 2.111111111111111,
|
|
"grad_norm": 0.3811750081248158,
|
|
"learning_rate": 3.532088886237956e-05,
|
|
"loss": 0.4315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12379217147827148,
|
|
"step": 190,
|
|
"valid_targets_mean": 2931.6,
|
|
"valid_targets_min": 313
|
|
},
|
|
{
|
|
"epoch": 2.1666666666666665,
|
|
"grad_norm": 0.358571537291706,
|
|
"learning_rate": 3.495890435016258e-05,
|
|
"loss": 0.4422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14856354892253876,
|
|
"step": 195,
|
|
"valid_targets_mean": 3480.2,
|
|
"valid_targets_min": 145
|
|
},
|
|
{
|
|
"epoch": 2.2222222222222223,
|
|
"grad_norm": 0.3737950299125905,
|
|
"learning_rate": 3.458543973863859e-05,
|
|
"loss": 0.4329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16106510162353516,
|
|
"step": 200,
|
|
"valid_targets_mean": 4051.9,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 2.2777777777777777,
|
|
"grad_norm": 0.3432433293662255,
|
|
"learning_rate": 3.420078164043161e-05,
|
|
"loss": 0.4284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14783862233161926,
|
|
"step": 205,
|
|
"valid_targets_mean": 3560.0,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 2.3333333333333335,
|
|
"grad_norm": 0.3430920927042314,
|
|
"learning_rate": 3.38052252585233e-05,
|
|
"loss": 0.4226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14196516573429108,
|
|
"step": 210,
|
|
"valid_targets_mean": 3396.0,
|
|
"valid_targets_min": 345
|
|
},
|
|
{
|
|
"epoch": 2.388888888888889,
|
|
"grad_norm": 0.38863253659695013,
|
|
"learning_rate": 3.339907415970168e-05,
|
|
"loss": 0.4235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13569079339504242,
|
|
"step": 215,
|
|
"valid_targets_mean": 3621.8,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 2.4444444444444446,
|
|
"grad_norm": 0.321929890810943,
|
|
"learning_rate": 3.298264004159104e-05,
|
|
"loss": 0.4206,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14095836877822876,
|
|
"step": 220,
|
|
"valid_targets_mean": 3712.3,
|
|
"valid_targets_min": 461
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.3529153108569068,
|
|
"learning_rate": 3.255624249344198e-05,
|
|
"loss": 0.4243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12544581294059753,
|
|
"step": 225,
|
|
"valid_targets_mean": 3365.8,
|
|
"valid_targets_min": 98
|
|
},
|
|
{
|
|
"epoch": 2.5555555555555554,
|
|
"grad_norm": 0.34457099540336644,
|
|
"learning_rate": 3.212020875086495e-05,
|
|
"loss": 0.4272,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14480704069137573,
|
|
"step": 230,
|
|
"valid_targets_mean": 3387.6,
|
|
"valid_targets_min": 595
|
|
},
|
|
{
|
|
"epoch": 2.611111111111111,
|
|
"grad_norm": 0.3455832583683071,
|
|
"learning_rate": 3.1674873444695804e-05,
|
|
"loss": 0.4347,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14685365557670593,
|
|
"step": 235,
|
|
"valid_targets_mean": 3451.9,
|
|
"valid_targets_min": 345
|
|
},
|
|
{
|
|
"epoch": 2.6666666666666665,
|
|
"grad_norm": 0.3215500471418138,
|
|
"learning_rate": 3.122057834418582e-05,
|
|
"loss": 0.422,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1365378350019455,
|
|
"step": 240,
|
|
"valid_targets_mean": 3914.4,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 2.7222222222222223,
|
|
"grad_norm": 0.34593025741903677,
|
|
"learning_rate": 3.075767209471345e-05,
|
|
"loss": 0.4147,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12988005578517914,
|
|
"step": 245,
|
|
"valid_targets_mean": 3272.9,
|
|
"valid_targets_min": 475
|
|
},
|
|
{
|
|
"epoch": 2.7777777777777777,
|
|
"grad_norm": 0.34062328817660814,
|
|
"learning_rate": 3.0286509950219077e-05,
|
|
"loss": 0.4207,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1327398270368576,
|
|
"step": 250,
|
|
"valid_targets_mean": 3390.3,
|
|
"valid_targets_min": 260
|
|
},
|
|
{
|
|
"epoch": 2.8333333333333335,
|
|
"grad_norm": 0.3357111436567365,
|
|
"learning_rate": 2.9807453500567937e-05,
|
|
"loss": 0.4165,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14951291680335999,
|
|
"step": 255,
|
|
"valid_targets_mean": 4245.5,
|
|
"valid_targets_min": 1302
|
|
},
|
|
{
|
|
"epoch": 2.888888888888889,
|
|
"grad_norm": 0.34569763385162866,
|
|
"learning_rate": 2.9320870394050783e-05,
|
|
"loss": 0.426,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14805033802986145,
|
|
"step": 260,
|
|
"valid_targets_mean": 3933.6,
|
|
"valid_targets_min": 325
|
|
},
|
|
{
|
|
"epoch": 2.9444444444444446,
|
|
"grad_norm": 0.34420513950314197,
|
|
"learning_rate": 2.8827134055234883e-05,
|
|
"loss": 0.4227,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14399433135986328,
|
|
"step": 265,
|
|
"valid_targets_mean": 3832.5,
|
|
"valid_targets_min": 1034
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 0.3983608305380748,
|
|
"learning_rate": 2.8326623398382174e-05,
|
|
"loss": 0.4186,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14143314957618713,
|
|
"step": 270,
|
|
"valid_targets_mean": 4202.6,
|
|
"valid_targets_min": 1467
|
|
},
|
|
{
|
|
"epoch": 3.0555555555555554,
|
|
"grad_norm": 0.35299028016710193,
|
|
"learning_rate": 2.781972253665431e-05,
|
|
"loss": 0.4132,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12317074835300446,
|
|
"step": 275,
|
|
"valid_targets_mean": 3195.2,
|
|
"valid_targets_min": 460
|
|
},
|
|
{
|
|
"epoch": 3.111111111111111,
|
|
"grad_norm": 0.3376366411188975,
|
|
"learning_rate": 2.7306820487327906e-05,
|
|
"loss": 0.4058,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14129731059074402,
|
|
"step": 280,
|
|
"valid_targets_mean": 3994.2,
|
|
"valid_targets_min": 909
|
|
},
|
|
{
|
|
"epoch": 3.1666666666666665,
|
|
"grad_norm": 0.35420688321944427,
|
|
"learning_rate": 2.6788310873246133e-05,
|
|
"loss": 0.4037,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1481894999742508,
|
|
"step": 285,
|
|
"valid_targets_mean": 3624.8,
|
|
"valid_targets_min": 186
|
|
},
|
|
{
|
|
"epoch": 3.2222222222222223,
|
|
"grad_norm": 0.33884247477237095,
|
|
"learning_rate": 2.62645916207358e-05,
|
|
"loss": 0.4022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12818801403045654,
|
|
"step": 290,
|
|
"valid_targets_mean": 3634.7,
|
|
"valid_targets_min": 406
|
|
},
|
|
{
|
|
"epoch": 3.2777777777777777,
|
|
"grad_norm": 0.35808314122646906,
|
|
"learning_rate": 2.5736064654221808e-05,
|
|
"loss": 0.4078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14692871272563934,
|
|
"step": 295,
|
|
"valid_targets_mean": 3957.3,
|
|
"valid_targets_min": 814
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.3360689997001178,
|
|
"learning_rate": 2.5203135587773196e-05,
|
|
"loss": 0.4064,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13141655921936035,
|
|
"step": 300,
|
|
"valid_targets_mean": 3711.9,
|
|
"valid_targets_min": 408
|
|
},
|
|
{
|
|
"epoch": 3.388888888888889,
|
|
"grad_norm": 0.34466387998125586,
|
|
"learning_rate": 2.4666213413817696e-05,
|
|
"loss": 0.4022,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1474984586238861,
|
|
"step": 305,
|
|
"valid_targets_mean": 4172.6,
|
|
"valid_targets_min": 334
|
|
},
|
|
{
|
|
"epoch": 3.4444444444444446,
|
|
"grad_norm": 0.3292600704344278,
|
|
"learning_rate": 2.4125710189263555e-05,
|
|
"loss": 0.4,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13848447799682617,
|
|
"step": 310,
|
|
"valid_targets_mean": 4131.7,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 3.5,
|
|
"grad_norm": 0.3522048413936891,
|
|
"learning_rate": 2.3582040719269504e-05,
|
|
"loss": 0.3979,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1349484920501709,
|
|
"step": 315,
|
|
"valid_targets_mean": 3562.1,
|
|
"valid_targets_min": 337
|
|
},
|
|
{
|
|
"epoch": 3.5555555555555554,
|
|
"grad_norm": 0.353827674173661,
|
|
"learning_rate": 2.3035622238905694e-05,
|
|
"loss": 0.404,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13351227343082428,
|
|
"step": 320,
|
|
"valid_targets_mean": 3304.5,
|
|
"valid_targets_min": 462
|
|
},
|
|
{
|
|
"epoch": 3.611111111111111,
|
|
"grad_norm": 0.36329667172881713,
|
|
"learning_rate": 2.2486874092949708e-05,
|
|
"loss": 0.4026,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12374818325042725,
|
|
"step": 325,
|
|
"valid_targets_mean": 3150.0,
|
|
"valid_targets_min": 431
|
|
},
|
|
{
|
|
"epoch": 3.6666666666666665,
|
|
"grad_norm": 0.34975906106997023,
|
|
"learning_rate": 2.1936217414063584e-05,
|
|
"loss": 0.4045,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11291907727718353,
|
|
"step": 330,
|
|
"valid_targets_mean": 3368.1,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 3.7222222222222223,
|
|
"grad_norm": 0.3262337332472019,
|
|
"learning_rate": 2.138407479959869e-05,
|
|
"loss": 0.4008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11993084847927094,
|
|
"step": 335,
|
|
"valid_targets_mean": 3319.6,
|
|
"valid_targets_min": 345
|
|
},
|
|
{
|
|
"epoch": 3.7777777777777777,
|
|
"grad_norm": 0.3905840521353238,
|
|
"learning_rate": 2.0830869987276537e-05,
|
|
"loss": 0.3996,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12794974446296692,
|
|
"step": 340,
|
|
"valid_targets_mean": 3615.5,
|
|
"valid_targets_min": 513
|
|
},
|
|
{
|
|
"epoch": 3.8333333333333335,
|
|
"grad_norm": 0.3767804784651145,
|
|
"learning_rate": 2.027702752999444e-05,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13156664371490479,
|
|
"step": 345,
|
|
"valid_targets_mean": 3314.1,
|
|
"valid_targets_min": 196
|
|
},
|
|
{
|
|
"epoch": 3.888888888888889,
|
|
"grad_norm": 0.4004500617710589,
|
|
"learning_rate": 1.9722972470005573e-05,
|
|
"loss": 0.4019,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16676589846611023,
|
|
"step": 350,
|
|
"valid_targets_mean": 4194.2,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 3.9444444444444446,
|
|
"grad_norm": 0.3354819345422965,
|
|
"learning_rate": 1.916913001272347e-05,
|
|
"loss": 0.4015,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12294766306877136,
|
|
"step": 355,
|
|
"valid_targets_mean": 4012.5,
|
|
"valid_targets_min": 206
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 0.3505665200641073,
|
|
"learning_rate": 1.8615925200401318e-05,
|
|
"loss": 0.4046,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12504121661186218,
|
|
"step": 360,
|
|
"valid_targets_mean": 3056.0,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 4.055555555555555,
|
|
"grad_norm": 0.3400952724809926,
|
|
"learning_rate": 1.806378258593642e-05,
|
|
"loss": 0.4018,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11445639282464981,
|
|
"step": 365,
|
|
"valid_targets_mean": 3330.9,
|
|
"valid_targets_min": 150
|
|
},
|
|
{
|
|
"epoch": 4.111111111111111,
|
|
"grad_norm": 0.3430283248301176,
|
|
"learning_rate": 1.7513125907050302e-05,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12557926774024963,
|
|
"step": 370,
|
|
"valid_targets_mean": 4006.9,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 4.166666666666667,
|
|
"grad_norm": 0.330963547175682,
|
|
"learning_rate": 1.6964377761094313e-05,
|
|
"loss": 0.3855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1276659518480301,
|
|
"step": 375,
|
|
"valid_targets_mean": 4130.3,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 4.222222222222222,
|
|
"grad_norm": 0.33066245321257237,
|
|
"learning_rate": 1.6417959280730506e-05,
|
|
"loss": 0.3956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1549251228570938,
|
|
"step": 380,
|
|
"valid_targets_mean": 4253.3,
|
|
"valid_targets_min": 690
|
|
},
|
|
{
|
|
"epoch": 4.277777777777778,
|
|
"grad_norm": 0.33770896122425714,
|
|
"learning_rate": 1.5874289810736452e-05,
|
|
"loss": 0.3854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12555822730064392,
|
|
"step": 385,
|
|
"valid_targets_mean": 3684.2,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 4.333333333333333,
|
|
"grad_norm": 0.3308154242663407,
|
|
"learning_rate": 1.5333786586182308e-05,
|
|
"loss": 0.3886,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1105135902762413,
|
|
"step": 390,
|
|
"valid_targets_mean": 3509.6,
|
|
"valid_targets_min": 206
|
|
},
|
|
{
|
|
"epoch": 4.388888888888889,
|
|
"grad_norm": 0.34154761675847767,
|
|
"learning_rate": 1.4796864412226812e-05,
|
|
"loss": 0.3912,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12594228982925415,
|
|
"step": 395,
|
|
"valid_targets_mean": 3624.0,
|
|
"valid_targets_min": 173
|
|
},
|
|
{
|
|
"epoch": 4.444444444444445,
|
|
"grad_norm": 0.34703014372739505,
|
|
"learning_rate": 1.4263935345778202e-05,
|
|
"loss": 0.3885,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11487920582294464,
|
|
"step": 400,
|
|
"valid_targets_mean": 3166.8,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 4.5,
|
|
"grad_norm": 0.33486183447383105,
|
|
"learning_rate": 1.37354083792642e-05,
|
|
"loss": 0.3939,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1340012103319168,
|
|
"step": 405,
|
|
"valid_targets_mean": 3674.7,
|
|
"valid_targets_min": 1323
|
|
},
|
|
{
|
|
"epoch": 4.555555555555555,
|
|
"grad_norm": 0.3280361715448883,
|
|
"learning_rate": 1.3211689126753879e-05,
|
|
"loss": 0.3893,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13589763641357422,
|
|
"step": 410,
|
|
"valid_targets_mean": 4136.9,
|
|
"valid_targets_min": 402
|
|
},
|
|
{
|
|
"epoch": 4.611111111111111,
|
|
"grad_norm": 0.3304314990276744,
|
|
"learning_rate": 1.26931795126721e-05,
|
|
"loss": 0.3932,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13184812664985657,
|
|
"step": 415,
|
|
"valid_targets_mean": 4020.1,
|
|
"valid_targets_min": 596
|
|
},
|
|
{
|
|
"epoch": 4.666666666666667,
|
|
"grad_norm": 0.36705421176651976,
|
|
"learning_rate": 1.2180277463345697e-05,
|
|
"loss": 0.3825,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12759140133857727,
|
|
"step": 420,
|
|
"valid_targets_mean": 3406.6,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 4.722222222222222,
|
|
"grad_norm": 0.37079333963641453,
|
|
"learning_rate": 1.167337660161783e-05,
|
|
"loss": 0.3879,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1401033103466034,
|
|
"step": 425,
|
|
"valid_targets_mean": 3449.6,
|
|
"valid_targets_min": 987
|
|
},
|
|
{
|
|
"epoch": 4.777777777777778,
|
|
"grad_norm": 0.37212452994075035,
|
|
"learning_rate": 1.1172865944765122e-05,
|
|
"loss": 0.3905,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12527167797088623,
|
|
"step": 430,
|
|
"valid_targets_mean": 3579.2,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 4.833333333333333,
|
|
"grad_norm": 0.31452234844700844,
|
|
"learning_rate": 1.067912960594923e-05,
|
|
"loss": 0.3922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13775086402893066,
|
|
"step": 435,
|
|
"valid_targets_mean": 4263.2,
|
|
"valid_targets_min": 600
|
|
},
|
|
{
|
|
"epoch": 4.888888888888889,
|
|
"grad_norm": 0.32313593185220624,
|
|
"learning_rate": 1.0192546499432066e-05,
|
|
"loss": 0.3845,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1394214630126953,
|
|
"step": 440,
|
|
"valid_targets_mean": 4157.8,
|
|
"valid_targets_min": 374
|
|
},
|
|
{
|
|
"epoch": 4.944444444444445,
|
|
"grad_norm": 0.31455375463568513,
|
|
"learning_rate": 9.713490049780931e-06,
|
|
"loss": 0.3871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10356146097183228,
|
|
"step": 445,
|
|
"valid_targets_mean": 3152.8,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.3424632953230929,
|
|
"learning_rate": 9.242327905286552e-06,
|
|
"loss": 0.3789,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11645328998565674,
|
|
"step": 450,
|
|
"valid_targets_mean": 3493.2,
|
|
"valid_targets_min": 378
|
|
},
|
|
{
|
|
"epoch": 5.055555555555555,
|
|
"grad_norm": 0.3147491459732294,
|
|
"learning_rate": 8.779421655814189e-06,
|
|
"loss": 0.3779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1383446902036667,
|
|
"step": 455,
|
|
"valid_targets_mean": 4309.0,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 5.111111111111111,
|
|
"grad_norm": 0.3270625971249571,
|
|
"learning_rate": 8.325126555304208e-06,
|
|
"loss": 0.3854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11969293653964996,
|
|
"step": 460,
|
|
"valid_targets_mean": 3615.5,
|
|
"valid_targets_min": 324
|
|
},
|
|
{
|
|
"epoch": 5.166666666666667,
|
|
"grad_norm": 0.505261887782307,
|
|
"learning_rate": 7.879791249135059e-06,
|
|
"loss": 0.3782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13538235425949097,
|
|
"step": 465,
|
|
"valid_targets_mean": 4006.3,
|
|
"valid_targets_min": 732
|
|
},
|
|
{
|
|
"epoch": 5.222222222222222,
|
|
"grad_norm": 0.3274039672119106,
|
|
"learning_rate": 7.443757506558033e-06,
|
|
"loss": 0.3803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12611891329288483,
|
|
"step": 470,
|
|
"valid_targets_mean": 3720.2,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 5.277777777777778,
|
|
"grad_norm": 0.325522620038438,
|
|
"learning_rate": 7.0173599584089625e-06,
|
|
"loss": 0.374,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12371672689914703,
|
|
"step": 475,
|
|
"valid_targets_mean": 3496.4,
|
|
"valid_targets_min": 249
|
|
},
|
|
{
|
|
"epoch": 5.333333333333333,
|
|
"grad_norm": 0.3956796011342642,
|
|
"learning_rate": 6.600925840298331e-06,
|
|
"loss": 0.3786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13171960413455963,
|
|
"step": 480,
|
|
"valid_targets_mean": 3330.3,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 5.388888888888889,
|
|
"grad_norm": 0.33440218053570064,
|
|
"learning_rate": 6.1947747414767035e-06,
|
|
"loss": 0.3803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11577549576759338,
|
|
"step": 485,
|
|
"valid_targets_mean": 3481.1,
|
|
"valid_targets_min": 343
|
|
},
|
|
{
|
|
"epoch": 5.444444444444445,
|
|
"grad_norm": 0.312449052176888,
|
|
"learning_rate": 5.799218359568395e-06,
|
|
"loss": 0.3874,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12085084617137909,
|
|
"step": 490,
|
|
"valid_targets_mean": 3568.8,
|
|
"valid_targets_min": 289
|
|
},
|
|
{
|
|
"epoch": 5.5,
|
|
"grad_norm": 0.32763223291113786,
|
|
"learning_rate": 5.414560261361415e-06,
|
|
"loss": 0.3827,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12720659375190735,
|
|
"step": 495,
|
|
"valid_targets_mean": 3656.4,
|
|
"valid_targets_min": 196
|
|
},
|
|
{
|
|
"epoch": 5.555555555555555,
|
|
"grad_norm": 0.3478293772830613,
|
|
"learning_rate": 5.041095649837429e-06,
|
|
"loss": 0.382,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14937317371368408,
|
|
"step": 500,
|
|
"valid_targets_mean": 4328.1,
|
|
"valid_targets_min": 541
|
|
},
|
|
{
|
|
"epoch": 5.611111111111111,
|
|
"grad_norm": 0.3333322556427379,
|
|
"learning_rate": 4.679111137620442e-06,
|
|
"loss": 0.3855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11903315037488937,
|
|
"step": 505,
|
|
"valid_targets_mean": 3451.2,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 5.666666666666667,
|
|
"grad_norm": 0.334978529000904,
|
|
"learning_rate": 4.328884527018067e-06,
|
|
"loss": 0.3768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1238088607788086,
|
|
"step": 510,
|
|
"valid_targets_mean": 3320.3,
|
|
"valid_targets_min": 381
|
|
},
|
|
{
|
|
"epoch": 5.722222222222222,
|
|
"grad_norm": 0.3391629049530058,
|
|
"learning_rate": 3.990684596824219e-06,
|
|
"loss": 0.384,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11955156177282333,
|
|
"step": 515,
|
|
"valid_targets_mean": 3707.0,
|
|
"valid_targets_min": 357
|
|
},
|
|
{
|
|
"epoch": 5.777777777777778,
|
|
"grad_norm": 0.3469430832903268,
|
|
"learning_rate": 3.6647708960468696e-06,
|
|
"loss": 0.3836,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14425542950630188,
|
|
"step": 520,
|
|
"valid_targets_mean": 4292.4,
|
|
"valid_targets_min": 1258
|
|
},
|
|
{
|
|
"epoch": 5.833333333333333,
|
|
"grad_norm": 0.31586389870343146,
|
|
"learning_rate": 3.3513935447190595e-06,
|
|
"loss": 0.3762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1405523270368576,
|
|
"step": 525,
|
|
"valid_targets_mean": 4005.5,
|
|
"valid_targets_min": 356
|
|
},
|
|
{
|
|
"epoch": 5.888888888888889,
|
|
"grad_norm": 0.3413641532518475,
|
|
"learning_rate": 3.050793041946183e-06,
|
|
"loss": 0.3797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1431114375591278,
|
|
"step": 530,
|
|
"valid_targets_mean": 3861.8,
|
|
"valid_targets_min": 298
|
|
},
|
|
{
|
|
"epoch": 5.944444444444445,
|
|
"grad_norm": 0.3640119009955889,
|
|
"learning_rate": 2.763200081336721e-06,
|
|
"loss": 0.3765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14532673358917236,
|
|
"step": 535,
|
|
"valid_targets_mean": 3385.6,
|
|
"valid_targets_min": 304
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"grad_norm": 0.3215369886099674,
|
|
"learning_rate": 2.488835373958185e-06,
|
|
"loss": 0.387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09596771001815796,
|
|
"step": 540,
|
|
"valid_targets_mean": 2951.0,
|
|
"valid_targets_min": 366
|
|
},
|
|
{
|
|
"epoch": 6.055555555555555,
|
|
"grad_norm": 0.32957858738873264,
|
|
"learning_rate": 2.2279094789540244e-06,
|
|
"loss": 0.3797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12534984946250916,
|
|
"step": 545,
|
|
"valid_targets_mean": 3643.2,
|
|
"valid_targets_min": 334
|
|
},
|
|
{
|
|
"epoch": 6.111111111111111,
|
|
"grad_norm": 0.30481921640658827,
|
|
"learning_rate": 1.9806226419516195e-06,
|
|
"loss": 0.3752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1421050727367401,
|
|
"step": 550,
|
|
"valid_targets_mean": 4784.9,
|
|
"valid_targets_min": 2109
|
|
},
|
|
{
|
|
"epoch": 6.166666666666667,
|
|
"grad_norm": 0.3261027404622833,
|
|
"learning_rate": 1.7471646413852439e-06,
|
|
"loss": 0.3751,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1255086064338684,
|
|
"step": 555,
|
|
"valid_targets_mean": 3384.8,
|
|
"valid_targets_min": 280
|
|
},
|
|
{
|
|
"epoch": 6.222222222222222,
|
|
"grad_norm": 0.3332990066131783,
|
|
"learning_rate": 1.527714642852045e-06,
|
|
"loss": 0.3771,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10999426990747452,
|
|
"step": 560,
|
|
"valid_targets_mean": 3432.9,
|
|
"valid_targets_min": 448
|
|
},
|
|
{
|
|
"epoch": 6.277777777777778,
|
|
"grad_norm": 0.31562874627264265,
|
|
"learning_rate": 1.3224410616127292e-06,
|
|
"loss": 0.3786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12806940078735352,
|
|
"step": 565,
|
|
"valid_targets_mean": 4304.7,
|
|
"valid_targets_min": 1720
|
|
},
|
|
{
|
|
"epoch": 6.333333333333333,
|
|
"grad_norm": 0.3466774183511758,
|
|
"learning_rate": 1.1315014333425455e-06,
|
|
"loss": 0.3729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1276746392250061,
|
|
"step": 570,
|
|
"valid_targets_mean": 3264.2,
|
|
"valid_targets_min": 1090
|
|
},
|
|
{
|
|
"epoch": 6.388888888888889,
|
|
"grad_norm": 0.33063763343455427,
|
|
"learning_rate": 9.550422932316938e-07,
|
|
"loss": 0.3707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12351889163255692,
|
|
"step": 575,
|
|
"valid_targets_mean": 4072.2,
|
|
"valid_targets_min": 345
|
|
},
|
|
{
|
|
"epoch": 6.444444444444445,
|
|
"grad_norm": 0.3371607315092282,
|
|
"learning_rate": 7.931990635280052e-07,
|
|
"loss": 0.381,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1360011100769043,
|
|
"step": 580,
|
|
"valid_targets_mean": 3644.0,
|
|
"valid_targets_min": 387
|
|
},
|
|
{
|
|
"epoch": 6.5,
|
|
"grad_norm": 0.3142943262407192,
|
|
"learning_rate": 6.460959496081276e-07,
|
|
"loss": 0.3729,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12145544588565826,
|
|
"step": 585,
|
|
"valid_targets_mean": 3623.0,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 6.555555555555555,
|
|
"grad_norm": 0.3120929356571334,
|
|
"learning_rate": 5.13845844657066e-07,
|
|
"loss": 0.3766,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12722007930278778,
|
|
"step": 590,
|
|
"valid_targets_mean": 4118.3,
|
|
"valid_targets_min": 214
|
|
},
|
|
{
|
|
"epoch": 6.611111111111111,
|
|
"grad_norm": 0.3248736657284745,
|
|
"learning_rate": 3.965502430291235e-07,
|
|
"loss": 0.38,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12026658654212952,
|
|
"step": 595,
|
|
"valid_targets_mean": 3682.3,
|
|
"valid_targets_min": 153
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.3371061664081027,
|
|
"learning_rate": 2.942991623568436e-07,
|
|
"loss": 0.3816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1275937557220459,
|
|
"step": 600,
|
|
"valid_targets_mean": 3659.6,
|
|
"valid_targets_min": 328
|
|
},
|
|
{
|
|
"epoch": 6.722222222222222,
|
|
"grad_norm": 0.3414371651160847,
|
|
"learning_rate": 2.0717107446762696e-07,
|
|
"loss": 0.3803,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11111524701118469,
|
|
"step": 605,
|
|
"valid_targets_mean": 2836.0,
|
|
"valid_targets_min": 390
|
|
},
|
|
{
|
|
"epoch": 6.777777777777778,
|
|
"grad_norm": 0.33615108170492364,
|
|
"learning_rate": 1.3523284516113955e-07,
|
|
"loss": 0.3797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14175578951835632,
|
|
"step": 610,
|
|
"valid_targets_mean": 3894.5,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 6.833333333333333,
|
|
"grad_norm": 0.3180306253416986,
|
|
"learning_rate": 7.853968289363245e-08,
|
|
"loss": 0.3783,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1266726553440094,
|
|
"step": 615,
|
|
"valid_targets_mean": 3853.8,
|
|
"valid_targets_min": 272
|
|
},
|
|
{
|
|
"epoch": 6.888888888888889,
|
|
"grad_norm": 0.34831401135169726,
|
|
"learning_rate": 3.7135096408631443e-08,
|
|
"loss": 0.3762,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11110063642263412,
|
|
"step": 620,
|
|
"valid_targets_mean": 3097.7,
|
|
"valid_targets_min": 1279
|
|
},
|
|
{
|
|
"epoch": 6.944444444444445,
|
|
"grad_norm": 0.39253455514179003,
|
|
"learning_rate": 1.1050861346488806e-08,
|
|
"loss": 0.3821,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12320859730243683,
|
|
"step": 625,
|
|
"valid_targets_mean": 3804.1,
|
|
"valid_targets_min": 505
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"grad_norm": 0.345704758959377,
|
|
"learning_rate": 3.069958583856725e-10,
|
|
"loss": 0.3799,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13302914798259735,
|
|
"step": 630,
|
|
"valid_targets_mean": 3211.4,
|
|
"valid_targets_min": 220
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13302914798259735,
|
|
"step": 630,
|
|
"total_flos": 1.4527566279155384e+18,
|
|
"train_loss": 0.4386636906199985,
|
|
"train_runtime": 8968.3932,
|
|
"train_samples_per_second": 6.727,
|
|
"train_steps_per_second": 0.07,
|
|
"valid_targets_mean": 3211.4,
|
|
"valid_targets_min": 220
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 630,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.4527566279155384e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|