Model: laion/sft__Kimi-2-5-swesmith-oracle-maxeps-32k__Qwen3-8B Source: Original Platform
1555 lines
43 KiB
JSON
1555 lines
43 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 686,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.051194539249146756,
|
|
"grad_norm": 25.82480694310689,
|
|
"learning_rate": 2.3188405797101453e-06,
|
|
"loss": 0.8837,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28898581862449646,
|
|
"step": 5,
|
|
"valid_targets_mean": 4911.5,
|
|
"valid_targets_min": 2215
|
|
},
|
|
{
|
|
"epoch": 0.10238907849829351,
|
|
"grad_norm": 12.744490357644285,
|
|
"learning_rate": 5.2173913043478265e-06,
|
|
"loss": 0.7924,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2303556501865387,
|
|
"step": 10,
|
|
"valid_targets_mean": 5216.9,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 0.15358361774744028,
|
|
"grad_norm": 2.6591207927386353,
|
|
"learning_rate": 8.115942028985508e-06,
|
|
"loss": 0.629,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22648176550865173,
|
|
"step": 15,
|
|
"valid_targets_mean": 5766.3,
|
|
"valid_targets_min": 2726
|
|
},
|
|
{
|
|
"epoch": 0.20477815699658702,
|
|
"grad_norm": 1.534238831991734,
|
|
"learning_rate": 1.101449275362319e-05,
|
|
"loss": 0.5623,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1773180514574051,
|
|
"step": 20,
|
|
"valid_targets_mean": 5145.9,
|
|
"valid_targets_min": 2241
|
|
},
|
|
{
|
|
"epoch": 0.25597269624573377,
|
|
"grad_norm": 1.0352664016291477,
|
|
"learning_rate": 1.391304347826087e-05,
|
|
"loss": 0.5276,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1391734778881073,
|
|
"step": 25,
|
|
"valid_targets_mean": 4861.1,
|
|
"valid_targets_min": 134
|
|
},
|
|
{
|
|
"epoch": 0.30716723549488056,
|
|
"grad_norm": 0.6935657264704905,
|
|
"learning_rate": 1.681159420289855e-05,
|
|
"loss": 0.4868,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15757042169570923,
|
|
"step": 30,
|
|
"valid_targets_mean": 5287.8,
|
|
"valid_targets_min": 1702
|
|
},
|
|
{
|
|
"epoch": 0.3583617747440273,
|
|
"grad_norm": 0.574511779794303,
|
|
"learning_rate": 1.9710144927536236e-05,
|
|
"loss": 0.4641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14636749029159546,
|
|
"step": 35,
|
|
"valid_targets_mean": 4977.4,
|
|
"valid_targets_min": 1383
|
|
},
|
|
{
|
|
"epoch": 0.40955631399317405,
|
|
"grad_norm": 0.45954923986300594,
|
|
"learning_rate": 2.2608695652173914e-05,
|
|
"loss": 0.4417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14387136697769165,
|
|
"step": 40,
|
|
"valid_targets_mean": 5467.8,
|
|
"valid_targets_min": 413
|
|
},
|
|
{
|
|
"epoch": 0.46075085324232085,
|
|
"grad_norm": 0.36407097207825967,
|
|
"learning_rate": 2.5507246376811593e-05,
|
|
"loss": 0.4245,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11947175860404968,
|
|
"step": 45,
|
|
"valid_targets_mean": 4556.4,
|
|
"valid_targets_min": 1526
|
|
},
|
|
{
|
|
"epoch": 0.5119453924914675,
|
|
"grad_norm": 0.3200847843431381,
|
|
"learning_rate": 2.840579710144928e-05,
|
|
"loss": 0.4008,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12619568407535553,
|
|
"step": 50,
|
|
"valid_targets_mean": 5079.8,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 0.5631399317406144,
|
|
"grad_norm": 0.2749784180114348,
|
|
"learning_rate": 3.130434782608696e-05,
|
|
"loss": 0.3861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15403807163238525,
|
|
"step": 55,
|
|
"valid_targets_mean": 5949.9,
|
|
"valid_targets_min": 350
|
|
},
|
|
{
|
|
"epoch": 0.6143344709897611,
|
|
"grad_norm": 0.260946688973715,
|
|
"learning_rate": 3.420289855072464e-05,
|
|
"loss": 0.3798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10824601352214813,
|
|
"step": 60,
|
|
"valid_targets_mean": 4519.8,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 0.6655290102389079,
|
|
"grad_norm": 0.2695889942073195,
|
|
"learning_rate": 3.7101449275362325e-05,
|
|
"loss": 0.3636,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14311721920967102,
|
|
"step": 65,
|
|
"valid_targets_mean": 5901.2,
|
|
"valid_targets_min": 2696
|
|
},
|
|
{
|
|
"epoch": 0.7167235494880546,
|
|
"grad_norm": 0.2513368707958076,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.3524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.113977812230587,
|
|
"step": 70,
|
|
"valid_targets_mean": 5193.1,
|
|
"valid_targets_min": 937
|
|
},
|
|
{
|
|
"epoch": 0.7679180887372014,
|
|
"grad_norm": 0.25669844186315777,
|
|
"learning_rate": 3.999351894109228e-05,
|
|
"loss": 0.3337,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10874387621879578,
|
|
"step": 75,
|
|
"valid_targets_mean": 5100.9,
|
|
"valid_targets_min": 1998
|
|
},
|
|
{
|
|
"epoch": 0.8191126279863481,
|
|
"grad_norm": 0.2654328857905388,
|
|
"learning_rate": 3.997407996478158e-05,
|
|
"loss": 0.3345,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11455205827951431,
|
|
"step": 80,
|
|
"valid_targets_mean": 5224.6,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 0.8703071672354948,
|
|
"grad_norm": 0.2593782401899841,
|
|
"learning_rate": 3.9941695669582944e-05,
|
|
"loss": 0.3304,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10599816590547562,
|
|
"step": 85,
|
|
"valid_targets_mean": 4968.2,
|
|
"valid_targets_min": 2196
|
|
},
|
|
{
|
|
"epoch": 0.9215017064846417,
|
|
"grad_norm": 0.23686889348670484,
|
|
"learning_rate": 3.989638704394887e-05,
|
|
"loss": 0.3168,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11794209480285645,
|
|
"step": 90,
|
|
"valid_targets_mean": 5557.9,
|
|
"valid_targets_min": 1855
|
|
},
|
|
{
|
|
"epoch": 0.9726962457337884,
|
|
"grad_norm": 0.27218880367654336,
|
|
"learning_rate": 3.983818345266653e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09047803282737732,
|
|
"step": 95,
|
|
"valid_targets_mean": 4906.0,
|
|
"valid_targets_min": 2087
|
|
},
|
|
{
|
|
"epoch": 1.0204778156996588,
|
|
"grad_norm": 0.2548103228534002,
|
|
"learning_rate": 3.976712261782631e-05,
|
|
"loss": 0.3204,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10694491863250732,
|
|
"step": 100,
|
|
"valid_targets_mean": 4863.1,
|
|
"valid_targets_min": 1985
|
|
},
|
|
{
|
|
"epoch": 1.0716723549488054,
|
|
"grad_norm": 0.26271765818455856,
|
|
"learning_rate": 3.968325059437385e-05,
|
|
"loss": 0.3154,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11918433755636215,
|
|
"step": 105,
|
|
"valid_targets_mean": 5599.3,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 1.1228668941979523,
|
|
"grad_norm": 0.27930934721029477,
|
|
"learning_rate": 3.958662174026164e-05,
|
|
"loss": 0.3121,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09404879063367844,
|
|
"step": 110,
|
|
"valid_targets_mean": 5062.9,
|
|
"valid_targets_min": 2242
|
|
},
|
|
{
|
|
"epoch": 1.174061433447099,
|
|
"grad_norm": 0.26321540953232964,
|
|
"learning_rate": 3.947729868121924e-05,
|
|
"loss": 0.3053,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10871408879756927,
|
|
"step": 115,
|
|
"valid_targets_mean": 5306.3,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 1.2252559726962458,
|
|
"grad_norm": 0.26402045132441954,
|
|
"learning_rate": 3.935535227016521e-05,
|
|
"loss": 0.3076,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11834833025932312,
|
|
"step": 120,
|
|
"valid_targets_mean": 5735.2,
|
|
"valid_targets_min": 365
|
|
},
|
|
{
|
|
"epoch": 1.2764505119453924,
|
|
"grad_norm": 0.26407368854218555,
|
|
"learning_rate": 3.922086154128693e-05,
|
|
"loss": 0.2968,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10184627771377563,
|
|
"step": 125,
|
|
"valid_targets_mean": 5052.8,
|
|
"valid_targets_min": 2201
|
|
},
|
|
{
|
|
"epoch": 1.3276450511945392,
|
|
"grad_norm": 0.26565677723672504,
|
|
"learning_rate": 3.907391365881802e-05,
|
|
"loss": 0.3075,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08913706988096237,
|
|
"step": 130,
|
|
"valid_targets_mean": 4881.6,
|
|
"valid_targets_min": 2122
|
|
},
|
|
{
|
|
"epoch": 1.378839590443686,
|
|
"grad_norm": 0.2831307137649524,
|
|
"learning_rate": 3.891460386054675e-05,
|
|
"loss": 0.299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09639839828014374,
|
|
"step": 135,
|
|
"valid_targets_mean": 4887.5,
|
|
"valid_targets_min": 2153
|
|
},
|
|
{
|
|
"epoch": 1.4300341296928327,
|
|
"grad_norm": 0.27646586353677727,
|
|
"learning_rate": 3.8743035396091845e-05,
|
|
"loss": 0.2998,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11154685169458389,
|
|
"step": 140,
|
|
"valid_targets_mean": 5716.4,
|
|
"valid_targets_min": 2925
|
|
},
|
|
{
|
|
"epoch": 1.4812286689419796,
|
|
"grad_norm": 0.27431775252756874,
|
|
"learning_rate": 3.8559319459985776e-05,
|
|
"loss": 0.2969,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08550999313592911,
|
|
"step": 145,
|
|
"valid_targets_mean": 4336.7,
|
|
"valid_targets_min": 416
|
|
},
|
|
{
|
|
"epoch": 1.5324232081911262,
|
|
"grad_norm": 0.25781067169583155,
|
|
"learning_rate": 3.836357511960898e-05,
|
|
"loss": 0.2946,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1011088490486145,
|
|
"step": 150,
|
|
"valid_targets_mean": 5483.2,
|
|
"valid_targets_min": 2251
|
|
},
|
|
{
|
|
"epoch": 1.583617747440273,
|
|
"grad_norm": 0.27911127666111935,
|
|
"learning_rate": 3.815592923802152e-05,
|
|
"loss": 0.2922,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10180200636386871,
|
|
"step": 155,
|
|
"valid_targets_mean": 5241.7,
|
|
"valid_targets_min": 1968
|
|
},
|
|
{
|
|
"epoch": 1.63481228668942,
|
|
"grad_norm": 0.2743316944893025,
|
|
"learning_rate": 3.793651639174246e-05,
|
|
"loss": 0.2881,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09541475772857666,
|
|
"step": 160,
|
|
"valid_targets_mean": 5239.0,
|
|
"valid_targets_min": 220
|
|
},
|
|
{
|
|
"epoch": 1.6860068259385665,
|
|
"grad_norm": 0.2943897714245753,
|
|
"learning_rate": 3.7705478783529986e-05,
|
|
"loss": 0.2971,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08847599476575851,
|
|
"step": 165,
|
|
"valid_targets_mean": 4835.5,
|
|
"valid_targets_min": 2225
|
|
},
|
|
{
|
|
"epoch": 1.7372013651877132,
|
|
"grad_norm": 0.261601602924854,
|
|
"learning_rate": 3.746296615021896e-05,
|
|
"loss": 0.296,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09461049735546112,
|
|
"step": 170,
|
|
"valid_targets_mean": 4939.5,
|
|
"valid_targets_min": 2062
|
|
},
|
|
{
|
|
"epoch": 1.78839590443686,
|
|
"grad_norm": 0.27191268600437934,
|
|
"learning_rate": 3.720913566567562e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09755190461874008,
|
|
"step": 175,
|
|
"valid_targets_mean": 5106.4,
|
|
"valid_targets_min": 2367
|
|
},
|
|
{
|
|
"epoch": 1.8395904436860069,
|
|
"grad_norm": 0.24464624444284053,
|
|
"learning_rate": 3.6944151838932274e-05,
|
|
"loss": 0.2935,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.110307976603508,
|
|
"step": 180,
|
|
"valid_targets_mean": 6091.2,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 1.8907849829351537,
|
|
"grad_norm": 0.2602335537625986,
|
|
"learning_rate": 3.666818640756797e-05,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09931231290102005,
|
|
"step": 185,
|
|
"valid_targets_mean": 5034.7,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 1.9419795221843004,
|
|
"grad_norm": 0.25671393831155714,
|
|
"learning_rate": 3.638141822640444e-05,
|
|
"loss": 0.2908,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0843583270907402,
|
|
"step": 190,
|
|
"valid_targets_mean": 4742.2,
|
|
"valid_targets_min": 2233
|
|
},
|
|
{
|
|
"epoch": 1.993174061433447,
|
|
"grad_norm": 0.23767750953863967,
|
|
"learning_rate": 3.608403315158917e-05,
|
|
"loss": 0.2915,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08634407818317413,
|
|
"step": 195,
|
|
"valid_targets_mean": 4896.8,
|
|
"valid_targets_min": 329
|
|
},
|
|
{
|
|
"epoch": 2.0409556313993176,
|
|
"grad_norm": 0.2549526896790837,
|
|
"learning_rate": 3.5776223920140985e-05,
|
|
"loss": 0.2833,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11179839074611664,
|
|
"step": 200,
|
|
"valid_targets_mean": 6050.2,
|
|
"valid_targets_min": 2254
|
|
},
|
|
{
|
|
"epoch": 2.092150170648464,
|
|
"grad_norm": 0.2738878896819206,
|
|
"learning_rate": 3.545819002503602e-05,
|
|
"loss": 0.284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09601035714149475,
|
|
"step": 205,
|
|
"valid_targets_mean": 5224.0,
|
|
"valid_targets_min": 1694
|
|
},
|
|
{
|
|
"epoch": 2.143344709897611,
|
|
"grad_norm": 0.2780264276944305,
|
|
"learning_rate": 3.513013758591515e-05,
|
|
"loss": 0.2815,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0888153463602066,
|
|
"step": 210,
|
|
"valid_targets_mean": 4907.5,
|
|
"valid_targets_min": 365
|
|
},
|
|
{
|
|
"epoch": 2.1945392491467577,
|
|
"grad_norm": 0.26333842271632707,
|
|
"learning_rate": 3.479227921549666e-05,
|
|
"loss": 0.281,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09323578327894211,
|
|
"step": 215,
|
|
"valid_targets_mean": 5229.7,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 2.2457337883959045,
|
|
"grad_norm": 0.2934385105416155,
|
|
"learning_rate": 3.444483388178066e-05,
|
|
"loss": 0.27,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09266763925552368,
|
|
"step": 220,
|
|
"valid_targets_mean": 5267.0,
|
|
"valid_targets_min": 410
|
|
},
|
|
{
|
|
"epoch": 2.296928327645051,
|
|
"grad_norm": 0.25567549374046494,
|
|
"learning_rate": 3.4088026766134654e-05,
|
|
"loss": 0.2782,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08348363637924194,
|
|
"step": 225,
|
|
"valid_targets_mean": 5137.3,
|
|
"valid_targets_min": 2069
|
|
},
|
|
{
|
|
"epoch": 2.348122866894198,
|
|
"grad_norm": 0.3022766271875021,
|
|
"learning_rate": 3.372208911735216e-05,
|
|
"loss": 0.2856,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08997620642185211,
|
|
"step": 230,
|
|
"valid_targets_mean": 5248.5,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 2.3993174061433447,
|
|
"grad_norm": 0.2534445787104033,
|
|
"learning_rate": 3.3347258101779015e-05,
|
|
"loss": 0.282,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11496127396821976,
|
|
"step": 235,
|
|
"valid_targets_mean": 5611.1,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 2.4505119453924915,
|
|
"grad_norm": 0.2520452606948065,
|
|
"learning_rate": 3.296377664960445e-05,
|
|
"loss": 0.2708,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09025128185749054,
|
|
"step": 240,
|
|
"valid_targets_mean": 5019.2,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 2.5017064846416384,
|
|
"grad_norm": 0.2627583946055227,
|
|
"learning_rate": 3.257189329741662e-05,
|
|
"loss": 0.2723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08457937836647034,
|
|
"step": 245,
|
|
"valid_targets_mean": 4861.2,
|
|
"valid_targets_min": 1761
|
|
},
|
|
{
|
|
"epoch": 2.5529010238907848,
|
|
"grad_norm": 0.2561709090419887,
|
|
"learning_rate": 3.217186202712458e-05,
|
|
"loss": 0.274,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0853310376405716,
|
|
"step": 250,
|
|
"valid_targets_mean": 4727.4,
|
|
"valid_targets_min": 1792
|
|
},
|
|
{
|
|
"epoch": 2.6040955631399316,
|
|
"grad_norm": 0.44538128860438136,
|
|
"learning_rate": 3.1763942101351095e-05,
|
|
"loss": 0.2773,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09366409480571747,
|
|
"step": 255,
|
|
"valid_targets_mean": 5007.0,
|
|
"valid_targets_min": 365
|
|
},
|
|
{
|
|
"epoch": 2.6552901023890785,
|
|
"grad_norm": 0.2852975254240757,
|
|
"learning_rate": 3.134839789540302e-05,
|
|
"loss": 0.2752,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0924173966050148,
|
|
"step": 260,
|
|
"valid_targets_mean": 5460.8,
|
|
"valid_targets_min": 433
|
|
},
|
|
{
|
|
"epoch": 2.7064846416382253,
|
|
"grad_norm": 0.26038175748374537,
|
|
"learning_rate": 3.0925498725928115e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0912502110004425,
|
|
"step": 265,
|
|
"valid_targets_mean": 5070.3,
|
|
"valid_targets_min": 2242
|
|
},
|
|
{
|
|
"epoch": 2.757679180887372,
|
|
"grad_norm": 0.2663014460071229,
|
|
"learning_rate": 3.0495518676369306e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09056515991687775,
|
|
"step": 270,
|
|
"valid_targets_mean": 5028.2,
|
|
"valid_targets_min": 1937
|
|
},
|
|
{
|
|
"epoch": 2.8088737201365186,
|
|
"grad_norm": 0.3060566751344927,
|
|
"learning_rate": 3.0058736419329643e-05,
|
|
"loss": 0.2693,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08921462297439575,
|
|
"step": 275,
|
|
"valid_targets_mean": 4812.8,
|
|
"valid_targets_min": 314
|
|
},
|
|
{
|
|
"epoch": 2.8600682593856654,
|
|
"grad_norm": 0.24797757600989206,
|
|
"learning_rate": 2.9615435035962878e-05,
|
|
"loss": 0.2774,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09497439861297607,
|
|
"step": 280,
|
|
"valid_targets_mean": 5516.0,
|
|
"valid_targets_min": 2022
|
|
},
|
|
{
|
|
"epoch": 2.9112627986348123,
|
|
"grad_norm": 0.3147509660798771,
|
|
"learning_rate": 2.9165901832506977e-05,
|
|
"loss": 0.2714,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09763729572296143,
|
|
"step": 285,
|
|
"valid_targets_mean": 4715.4,
|
|
"valid_targets_min": 284
|
|
},
|
|
{
|
|
"epoch": 2.962457337883959,
|
|
"grad_norm": 0.26780692138537443,
|
|
"learning_rate": 2.8710428154079185e-05,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08496683835983276,
|
|
"step": 290,
|
|
"valid_targets_mean": 4745.5,
|
|
"valid_targets_min": 1235
|
|
},
|
|
{
|
|
"epoch": 3.0102389078498293,
|
|
"grad_norm": 0.26968597810630485,
|
|
"learning_rate": 2.824930919585359e-05,
|
|
"loss": 0.2695,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08243635296821594,
|
|
"step": 295,
|
|
"valid_targets_mean": 5450.9,
|
|
"valid_targets_min": 317
|
|
},
|
|
{
|
|
"epoch": 3.061433447098976,
|
|
"grad_norm": 0.31124671324275366,
|
|
"learning_rate": 2.778284381174336e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08364085853099823,
|
|
"step": 300,
|
|
"valid_targets_mean": 5530.3,
|
|
"valid_targets_min": 1478
|
|
},
|
|
{
|
|
"epoch": 3.112627986348123,
|
|
"grad_norm": 0.30315704623816053,
|
|
"learning_rate": 2.7311334320711784e-05,
|
|
"loss": 0.2627,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09039817005395889,
|
|
"step": 305,
|
|
"valid_targets_mean": 5173.2,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 3.1638225255972694,
|
|
"grad_norm": 0.2822760635459168,
|
|
"learning_rate": 2.683508631083755e-05,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07820077240467072,
|
|
"step": 310,
|
|
"valid_targets_mean": 4802.0,
|
|
"valid_targets_min": 2196
|
|
},
|
|
{
|
|
"epoch": 3.2150170648464163,
|
|
"grad_norm": 0.29743866886806536,
|
|
"learning_rate": 2.6354408441261324e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09245477616786957,
|
|
"step": 315,
|
|
"valid_targets_mean": 5530.5,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 3.266211604095563,
|
|
"grad_norm": 0.27606973763989207,
|
|
"learning_rate": 2.5869612242141946e-05,
|
|
"loss": 0.2664,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08491060137748718,
|
|
"step": 320,
|
|
"valid_targets_mean": 5039.2,
|
|
"valid_targets_min": 2348
|
|
},
|
|
{
|
|
"epoch": 3.31740614334471,
|
|
"grad_norm": 0.2502805038424136,
|
|
"learning_rate": 2.538101191275189e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09891136735677719,
|
|
"step": 325,
|
|
"valid_targets_mean": 5541.6,
|
|
"valid_targets_min": 315
|
|
},
|
|
{
|
|
"epoch": 3.368600682593857,
|
|
"grad_norm": 0.2611678416517467,
|
|
"learning_rate": 2.488892411784286e-05,
|
|
"loss": 0.2601,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08309619128704071,
|
|
"step": 330,
|
|
"valid_targets_mean": 4493.0,
|
|
"valid_targets_min": 2063
|
|
},
|
|
{
|
|
"epoch": 3.419795221843003,
|
|
"grad_norm": 0.2611914869636264,
|
|
"learning_rate": 2.439366778241352e-05,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09184084832668304,
|
|
"step": 335,
|
|
"valid_targets_mean": 4901.5,
|
|
"valid_targets_min": 430
|
|
},
|
|
{
|
|
"epoch": 3.47098976109215,
|
|
"grad_norm": 0.27246810160180385,
|
|
"learning_rate": 2.3895563885012303e-05,
|
|
"loss": 0.2604,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08379268646240234,
|
|
"step": 340,
|
|
"valid_targets_mean": 4821.3,
|
|
"valid_targets_min": 2467
|
|
},
|
|
{
|
|
"epoch": 3.522184300341297,
|
|
"grad_norm": 0.2536800115672537,
|
|
"learning_rate": 2.3394935249709332e-05,
|
|
"loss": 0.2641,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08427208662033081,
|
|
"step": 345,
|
|
"valid_targets_mean": 4990.8,
|
|
"valid_targets_min": 2231
|
|
},
|
|
{
|
|
"epoch": 3.573378839590444,
|
|
"grad_norm": 0.258064897446696,
|
|
"learning_rate": 2.2892106336872234e-05,
|
|
"loss": 0.2659,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08550337702035904,
|
|
"step": 350,
|
|
"valid_targets_mean": 5197.7,
|
|
"valid_targets_min": 2135
|
|
},
|
|
{
|
|
"epoch": 3.6245733788395906,
|
|
"grad_norm": 0.2495721887544657,
|
|
"learning_rate": 2.2387403032881467e-05,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08749514818191528,
|
|
"step": 355,
|
|
"valid_targets_mean": 4881.7,
|
|
"valid_targets_min": 2212
|
|
},
|
|
{
|
|
"epoch": 3.675767918088737,
|
|
"grad_norm": 0.2468445540270157,
|
|
"learning_rate": 2.1881152438921447e-05,
|
|
"loss": 0.2624,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08829236775636673,
|
|
"step": 360,
|
|
"valid_targets_mean": 5321.8,
|
|
"valid_targets_min": 1695
|
|
},
|
|
{
|
|
"epoch": 3.726962457337884,
|
|
"grad_norm": 0.2550167362316407,
|
|
"learning_rate": 2.1373682658984317e-05,
|
|
"loss": 0.2639,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0848623663187027,
|
|
"step": 365,
|
|
"valid_targets_mean": 4613.7,
|
|
"valid_targets_min": 2456
|
|
},
|
|
{
|
|
"epoch": 3.7781569965870307,
|
|
"grad_norm": 0.2310804725665606,
|
|
"learning_rate": 2.0865322587223855e-05,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08814995735883713,
|
|
"step": 370,
|
|
"valid_targets_mean": 5460.0,
|
|
"valid_targets_min": 1805
|
|
},
|
|
{
|
|
"epoch": 3.8293515358361776,
|
|
"grad_norm": 0.23090177072633178,
|
|
"learning_rate": 2.035640169479719e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.06490929424762726,
|
|
"step": 375,
|
|
"valid_targets_mean": 4068.9,
|
|
"valid_targets_min": 1709
|
|
},
|
|
{
|
|
"epoch": 3.8805460750853245,
|
|
"grad_norm": 0.23838704123836477,
|
|
"learning_rate": 1.9847249816332644e-05,
|
|
"loss": 0.2679,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08089349418878555,
|
|
"step": 380,
|
|
"valid_targets_mean": 4778.1,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 3.931740614334471,
|
|
"grad_norm": 0.260605487192663,
|
|
"learning_rate": 1.933819693616195e-05,
|
|
"loss": 0.2529,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08289642632007599,
|
|
"step": 385,
|
|
"valid_targets_mean": 4828.9,
|
|
"valid_targets_min": 2450
|
|
},
|
|
{
|
|
"epoch": 3.9829351535836177,
|
|
"grad_norm": 0.26648319115456115,
|
|
"learning_rate": 1.8829572974455465e-05,
|
|
"loss": 0.2689,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10124649107456207,
|
|
"step": 390,
|
|
"valid_targets_mean": 5940.3,
|
|
"valid_targets_min": 2112
|
|
},
|
|
{
|
|
"epoch": 4.030716723549488,
|
|
"grad_norm": 0.25074836696819963,
|
|
"learning_rate": 1.832170757339895e-05,
|
|
"loss": 0.2607,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09088584035634995,
|
|
"step": 395,
|
|
"valid_targets_mean": 5790.5,
|
|
"valid_targets_min": 1996
|
|
},
|
|
{
|
|
"epoch": 4.081911262798635,
|
|
"grad_norm": 0.23932382754020326,
|
|
"learning_rate": 1.781492988355056e-05,
|
|
"loss": 0.2591,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08872021734714508,
|
|
"step": 400,
|
|
"valid_targets_mean": 5202.1,
|
|
"valid_targets_min": 443
|
|
},
|
|
{
|
|
"epoch": 4.1331058020477816,
|
|
"grad_norm": 0.24243405252900668,
|
|
"learning_rate": 1.7309568350516376e-05,
|
|
"loss": 0.254,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07671861350536346,
|
|
"step": 405,
|
|
"valid_targets_mean": 5088.6,
|
|
"valid_targets_min": 2001
|
|
},
|
|
{
|
|
"epoch": 4.184300341296928,
|
|
"grad_norm": 0.24082013214852604,
|
|
"learning_rate": 1.680595050208296e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08671650290489197,
|
|
"step": 410,
|
|
"valid_targets_mean": 5681.1,
|
|
"valid_targets_min": 2280
|
|
},
|
|
{
|
|
"epoch": 4.235494880546075,
|
|
"grad_norm": 0.2705939041983527,
|
|
"learning_rate": 1.630440273594455e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08631773293018341,
|
|
"step": 415,
|
|
"valid_targets_mean": 4975.5,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 4.286689419795222,
|
|
"grad_norm": 0.2647801079481516,
|
|
"learning_rate": 1.5805250108162898e-05,
|
|
"loss": 0.2493,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08514288067817688,
|
|
"step": 420,
|
|
"valid_targets_mean": 5125.4,
|
|
"valid_targets_min": 2016
|
|
},
|
|
{
|
|
"epoch": 4.337883959044369,
|
|
"grad_norm": 0.24156132010147113,
|
|
"learning_rate": 1.530881612249646e-05,
|
|
"loss": 0.2581,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0757707729935646,
|
|
"step": 425,
|
|
"valid_targets_mean": 5020.7,
|
|
"valid_targets_min": 2842
|
|
},
|
|
{
|
|
"epoch": 4.389078498293515,
|
|
"grad_norm": 0.24248555906848412,
|
|
"learning_rate": 1.4815422520735735e-05,
|
|
"loss": 0.2536,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07050696760416031,
|
|
"step": 430,
|
|
"valid_targets_mean": 4395.3,
|
|
"valid_targets_min": 2210
|
|
},
|
|
{
|
|
"epoch": 4.440273037542662,
|
|
"grad_norm": 0.24171942435675195,
|
|
"learning_rate": 1.432538907418047e-05,
|
|
"loss": 0.2548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08460260927677155,
|
|
"step": 435,
|
|
"valid_targets_mean": 5636.2,
|
|
"valid_targets_min": 2623
|
|
},
|
|
{
|
|
"epoch": 4.491467576791809,
|
|
"grad_norm": 0.31663158719733203,
|
|
"learning_rate": 1.3839033376394082e-05,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08407416194677353,
|
|
"step": 440,
|
|
"valid_targets_mean": 4772.5,
|
|
"valid_targets_min": 134
|
|
},
|
|
{
|
|
"epoch": 4.5426621160409555,
|
|
"grad_norm": 0.23417315461999244,
|
|
"learning_rate": 1.33566706373693e-05,
|
|
"loss": 0.2599,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08053170144557953,
|
|
"step": 445,
|
|
"valid_targets_mean": 5175.6,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 4.593856655290102,
|
|
"grad_norm": 0.24575926696014141,
|
|
"learning_rate": 1.2878613479238774e-05,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08464860171079636,
|
|
"step": 450,
|
|
"valid_targets_mean": 5122.8,
|
|
"valid_targets_min": 1882
|
|
},
|
|
{
|
|
"epoch": 4.645051194539249,
|
|
"grad_norm": 0.23884179028036426,
|
|
"learning_rate": 1.2405171733662822e-05,
|
|
"loss": 0.2519,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07469707727432251,
|
|
"step": 455,
|
|
"valid_targets_mean": 4802.0,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 4.696245733788396,
|
|
"grad_norm": 0.2622151398636782,
|
|
"learning_rate": 1.1936652241025679e-05,
|
|
"loss": 0.2499,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07871793210506439,
|
|
"step": 460,
|
|
"valid_targets_mean": 5118.5,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 4.747440273037543,
|
|
"grad_norm": 0.24354224502364916,
|
|
"learning_rate": 1.1473358651570479e-05,
|
|
"loss": 0.2589,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08825745433568954,
|
|
"step": 465,
|
|
"valid_targets_mean": 5207.9,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 4.798634812286689,
|
|
"grad_norm": 0.26717838407309835,
|
|
"learning_rate": 1.1015591228601692e-05,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09401947259902954,
|
|
"step": 470,
|
|
"valid_targets_mean": 5151.4,
|
|
"valid_targets_min": 2040
|
|
},
|
|
{
|
|
"epoch": 4.849829351535837,
|
|
"grad_norm": 0.6288828196111563,
|
|
"learning_rate": 1.0563646653882755e-05,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08458419144153595,
|
|
"step": 475,
|
|
"valid_targets_mean": 5222.2,
|
|
"valid_targets_min": 2375
|
|
},
|
|
{
|
|
"epoch": 4.901023890784983,
|
|
"grad_norm": 0.2388380799823632,
|
|
"learning_rate": 1.0117817835354851e-05,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10258987545967102,
|
|
"step": 480,
|
|
"valid_targets_mean": 5773.7,
|
|
"valid_targets_min": 273
|
|
},
|
|
{
|
|
"epoch": 4.952218430034129,
|
|
"grad_norm": 0.23524178853837582,
|
|
"learning_rate": 9.678393717301526e-06,
|
|
"loss": 0.2609,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08160092681646347,
|
|
"step": 485,
|
|
"valid_targets_mean": 4906.0,
|
|
"valid_targets_min": 2117
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.2815721527054231,
|
|
"learning_rate": 9.245659093082243e-06,
|
|
"loss": 0.2541,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11172492057085037,
|
|
"step": 490,
|
|
"valid_targets_mean": 4392.0,
|
|
"valid_targets_min": 519
|
|
},
|
|
{
|
|
"epoch": 5.051194539249146,
|
|
"grad_norm": 0.23545881110678396,
|
|
"learning_rate": 8.819894420556112e-06,
|
|
"loss": 0.246,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09946560859680176,
|
|
"step": 495,
|
|
"valid_targets_mean": 5775.0,
|
|
"valid_targets_min": 1809
|
|
},
|
|
{
|
|
"epoch": 5.102389078498294,
|
|
"grad_norm": 0.2332984410262101,
|
|
"learning_rate": 8.40137564031547e-06,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08911414444446564,
|
|
"step": 500,
|
|
"valid_targets_mean": 4858.6,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 5.15358361774744,
|
|
"grad_norm": 0.2406516993513017,
|
|
"learning_rate": 7.990373996847194e-06,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0798487663269043,
|
|
"step": 505,
|
|
"valid_targets_mean": 5106.6,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 5.204778156996587,
|
|
"grad_norm": 0.25632115440474484,
|
|
"learning_rate": 7.5871558627375295e-06,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08686481416225433,
|
|
"step": 510,
|
|
"valid_targets_mean": 4846.7,
|
|
"valid_targets_min": 492
|
|
},
|
|
{
|
|
"epoch": 5.255972696245734,
|
|
"grad_norm": 0.23873965803262287,
|
|
"learning_rate": 7.1919825660344696e-06,
|
|
"loss": 0.255,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08521182835102081,
|
|
"step": 515,
|
|
"valid_targets_mean": 5219.3,
|
|
"valid_targets_min": 1881
|
|
},
|
|
{
|
|
"epoch": 5.30716723549488,
|
|
"grad_norm": 0.23815246828270364,
|
|
"learning_rate": 6.805110220879459e-06,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07839064300060272,
|
|
"step": 520,
|
|
"valid_targets_mean": 4840.9,
|
|
"valid_targets_min": 2536
|
|
},
|
|
{
|
|
"epoch": 5.3583617747440275,
|
|
"grad_norm": 0.2975224705505978,
|
|
"learning_rate": 6.4267895615183915e-06,
|
|
"loss": 0.2547,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08646044880151749,
|
|
"step": 525,
|
|
"valid_targets_mean": 4866.6,
|
|
"valid_targets_min": 497
|
|
},
|
|
{
|
|
"epoch": 5.409556313993174,
|
|
"grad_norm": 0.23640902606037387,
|
|
"learning_rate": 6.057265779799193e-06,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10723152756690979,
|
|
"step": 530,
|
|
"valid_targets_mean": 5736.0,
|
|
"valid_targets_min": 299
|
|
},
|
|
{
|
|
"epoch": 5.460750853242321,
|
|
"grad_norm": 0.2307913484720284,
|
|
"learning_rate": 5.696778366261575e-06,
|
|
"loss": 0.2459,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08867950737476349,
|
|
"step": 535,
|
|
"valid_targets_mean": 5571.2,
|
|
"valid_targets_min": 1468
|
|
},
|
|
{
|
|
"epoch": 5.511945392491468,
|
|
"grad_norm": 0.23315915412363156,
|
|
"learning_rate": 5.345560954921802e-06,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08251780271530151,
|
|
"step": 540,
|
|
"valid_targets_mean": 5138.9,
|
|
"valid_targets_min": 428
|
|
},
|
|
{
|
|
"epoch": 5.563139931740614,
|
|
"grad_norm": 0.22322887600839503,
|
|
"learning_rate": 5.00384117185311e-06,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09144619107246399,
|
|
"step": 545,
|
|
"valid_targets_mean": 5855.6,
|
|
"valid_targets_min": 1850
|
|
},
|
|
{
|
|
"epoch": 5.614334470989761,
|
|
"grad_norm": 0.24296113504336472,
|
|
"learning_rate": 4.671840487659882e-06,
|
|
"loss": 0.2509,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09224293380975723,
|
|
"step": 550,
|
|
"valid_targets_mean": 6035.7,
|
|
"valid_targets_min": 2794
|
|
},
|
|
{
|
|
"epoch": 5.665529010238908,
|
|
"grad_norm": 0.2501891462512511,
|
|
"learning_rate": 4.3497740739413015e-06,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07294757664203644,
|
|
"step": 555,
|
|
"valid_targets_mean": 4596.5,
|
|
"valid_targets_min": 453
|
|
},
|
|
{
|
|
"epoch": 5.716723549488055,
|
|
"grad_norm": 0.23660123212684395,
|
|
"learning_rate": 4.037850663837315e-06,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07551420480012894,
|
|
"step": 560,
|
|
"valid_targets_mean": 4695.0,
|
|
"valid_targets_min": 1834
|
|
},
|
|
{
|
|
"epoch": 5.7679180887372015,
|
|
"grad_norm": 0.2298824544771145,
|
|
"learning_rate": 3.7362724167474774e-06,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0779377892613411,
|
|
"step": 565,
|
|
"valid_targets_mean": 4954.0,
|
|
"valid_targets_min": 2135
|
|
},
|
|
{
|
|
"epoch": 5.819112627986348,
|
|
"grad_norm": 0.21382794578335707,
|
|
"learning_rate": 3.4452347873102565e-06,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07789836078882217,
|
|
"step": 570,
|
|
"valid_targets_mean": 5438.0,
|
|
"valid_targets_min": 2658
|
|
},
|
|
{
|
|
"epoch": 5.870307167235495,
|
|
"grad_norm": 0.21625982549299644,
|
|
"learning_rate": 3.1649263987277303e-06,
|
|
"loss": 0.247,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0796944797039032,
|
|
"step": 575,
|
|
"valid_targets_mean": 5126.0,
|
|
"valid_targets_min": 1466
|
|
},
|
|
{
|
|
"epoch": 5.921501706484642,
|
|
"grad_norm": 0.21966123329170228,
|
|
"learning_rate": 2.8955289205177696e-06,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08940281718969345,
|
|
"step": 580,
|
|
"valid_targets_mean": 5365.1,
|
|
"valid_targets_min": 542
|
|
},
|
|
{
|
|
"epoch": 5.972696245733788,
|
|
"grad_norm": 0.23264769437080063,
|
|
"learning_rate": 2.6372169507729627e-06,
|
|
"loss": 0.2588,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08277872949838638,
|
|
"step": 585,
|
|
"valid_targets_mean": 5623.2,
|
|
"valid_targets_min": 2509
|
|
},
|
|
{
|
|
"epoch": 6.020477815699659,
|
|
"grad_norm": 0.23969753992511295,
|
|
"learning_rate": 2.3901579030025566e-06,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0817054808139801,
|
|
"step": 590,
|
|
"valid_targets_mean": 5169.0,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 6.071672354948806,
|
|
"grad_norm": 0.235094543609173,
|
|
"learning_rate": 2.15451189763078e-06,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09182994067668915,
|
|
"step": 595,
|
|
"valid_targets_mean": 5807.4,
|
|
"valid_targets_min": 2348
|
|
},
|
|
{
|
|
"epoch": 6.122866894197952,
|
|
"grad_norm": 0.2427230093710648,
|
|
"learning_rate": 1.930431658221854e-06,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08008411526679993,
|
|
"step": 600,
|
|
"valid_targets_mean": 4879.1,
|
|
"valid_targets_min": 446
|
|
},
|
|
{
|
|
"epoch": 6.174061433447099,
|
|
"grad_norm": 0.23142147399386198,
|
|
"learning_rate": 1.7180624124989398e-06,
|
|
"loss": 0.2543,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0793958306312561,
|
|
"step": 605,
|
|
"valid_targets_mean": 5092.9,
|
|
"valid_targets_min": 2197
|
|
},
|
|
{
|
|
"epoch": 6.225255972696246,
|
|
"grad_norm": 0.2280686494846546,
|
|
"learning_rate": 1.5175417982212138e-06,
|
|
"loss": 0.2475,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07384900748729706,
|
|
"step": 610,
|
|
"valid_targets_mean": 4928.0,
|
|
"valid_targets_min": 1860
|
|
},
|
|
{
|
|
"epoch": 6.276450511945392,
|
|
"grad_norm": 0.24929297314875484,
|
|
"learning_rate": 1.3289997739800108e-06,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0833367109298706,
|
|
"step": 615,
|
|
"valid_targets_mean": 5088.8,
|
|
"valid_targets_min": 2764
|
|
},
|
|
{
|
|
"epoch": 6.327645051194539,
|
|
"grad_norm": 0.22000964241336962,
|
|
"learning_rate": 1.1525585349718948e-06,
|
|
"loss": 0.2465,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07332947105169296,
|
|
"step": 620,
|
|
"valid_targets_mean": 4677.8,
|
|
"valid_targets_min": 220
|
|
},
|
|
{
|
|
"epoch": 6.378839590443686,
|
|
"grad_norm": 0.21275459726601303,
|
|
"learning_rate": 9.883324338032474e-07,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07395243644714355,
|
|
"step": 625,
|
|
"valid_targets_mean": 5172.9,
|
|
"valid_targets_min": 2763
|
|
},
|
|
{
|
|
"epoch": 6.4300341296928325,
|
|
"grad_norm": 0.22175862203200505,
|
|
"learning_rate": 8.364279063776526e-07,
|
|
"loss": 0.2458,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0932476595044136,
|
|
"step": 630,
|
|
"valid_targets_mean": 5806.0,
|
|
"valid_targets_min": 2569
|
|
},
|
|
{
|
|
"epoch": 6.48122866894198,
|
|
"grad_norm": 0.2230388965834993,
|
|
"learning_rate": 6.969434029141676e-07,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07441005110740662,
|
|
"step": 635,
|
|
"valid_targets_mean": 4756.5,
|
|
"valid_targets_min": 2402
|
|
},
|
|
{
|
|
"epoch": 6.532423208191126,
|
|
"grad_norm": 0.349252390093201,
|
|
"learning_rate": 5.699693241411619e-07,
|
|
"loss": 0.248,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09055956453084946,
|
|
"step": 640,
|
|
"valid_targets_mean": 5269.7,
|
|
"valid_targets_min": 222
|
|
},
|
|
{
|
|
"epoch": 6.5836177474402735,
|
|
"grad_norm": 0.22602023572874996,
|
|
"learning_rate": 4.5558796270706254e-07,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09014368057250977,
|
|
"step": 645,
|
|
"valid_targets_mean": 5677.8,
|
|
"valid_targets_min": 2555
|
|
},
|
|
{
|
|
"epoch": 6.63481228668942,
|
|
"grad_norm": 0.24090264289449,
|
|
"learning_rate": 3.5387344984600946e-07,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08535467833280563,
|
|
"step": 650,
|
|
"valid_targets_mean": 4994.6,
|
|
"valid_targets_min": 2472
|
|
},
|
|
{
|
|
"epoch": 6.686006825938566,
|
|
"grad_norm": 0.20794321625955012,
|
|
"learning_rate": 2.64891707332966e-07,
|
|
"loss": 0.2494,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09163576364517212,
|
|
"step": 655,
|
|
"valid_targets_mean": 5890.2,
|
|
"valid_targets_min": 2276
|
|
},
|
|
{
|
|
"epoch": 6.737201365187714,
|
|
"grad_norm": 0.22007475356230136,
|
|
"learning_rate": 1.887004047594232e-07,
|
|
"loss": 0.239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08739501237869263,
|
|
"step": 660,
|
|
"valid_targets_mean": 5521.7,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 6.78839590443686,
|
|
"grad_norm": 0.25286299855724226,
|
|
"learning_rate": 1.2534892215740667e-07,
|
|
"loss": 0.2517,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10239039361476898,
|
|
"step": 665,
|
|
"valid_targets_mean": 5944.1,
|
|
"valid_targets_min": 393
|
|
},
|
|
{
|
|
"epoch": 6.839590443686006,
|
|
"grad_norm": 0.24536425918506533,
|
|
"learning_rate": 7.487831799597889e-08,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08520665764808655,
|
|
"step": 670,
|
|
"valid_targets_mean": 5079.4,
|
|
"valid_targets_min": 1871
|
|
},
|
|
{
|
|
"epoch": 6.890784982935154,
|
|
"grad_norm": 0.22126404698942398,
|
|
"learning_rate": 3.73213025710073e-08,
|
|
"loss": 0.2511,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08079634606838226,
|
|
"step": 675,
|
|
"valid_targets_mean": 5323.9,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 6.9419795221843,
|
|
"grad_norm": 0.23165959369746986,
|
|
"learning_rate": 1.2702216805431377e-08,
|
|
"loss": 0.257,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08266259729862213,
|
|
"step": 680,
|
|
"valid_targets_mean": 5277.4,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 6.993174061433447,
|
|
"grad_norm": 0.2427793642183869,
|
|
"learning_rate": 1.037016473757202e-09,
|
|
"loss": 0.2553,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08131192624568939,
|
|
"step": 685,
|
|
"valid_targets_mean": 5002.9,
|
|
"valid_targets_min": 281
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11108480393886566,
|
|
"step": 686,
|
|
"total_flos": 2.601520842818978e+18,
|
|
"train_loss": 0.2925965665424183,
|
|
"train_runtime": 15951.2631,
|
|
"train_samples_per_second": 4.106,
|
|
"train_steps_per_second": 0.043,
|
|
"valid_targets_mean": 4441.2,
|
|
"valid_targets_min": 1797
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 686,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.601520842818978e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|