Files
nemotron-terminal-system_ad…/trainer_state.json
ModelHub XC 041a9f0b3f 初始化项目,由ModelHub XC社区提供模型
Model: laion/nemotron-terminal-system_administration__Qwen3-8B
Source: Original Platform
2026-04-22 18:49:08 +08:00

1287 lines
36 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 567,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06172839506172839,
"grad_norm": 10.429516590233682,
"learning_rate": 2.8070175438596493e-06,
"loss": 0.8393,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2808097004890442,
"step": 5,
"valid_targets_mean": 9607.8,
"valid_targets_min": 3068
},
{
"epoch": 0.12345679012345678,
"grad_norm": 5.065259502819224,
"learning_rate": 6.31578947368421e-06,
"loss": 0.7853,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2494523823261261,
"step": 10,
"valid_targets_mean": 9841.2,
"valid_targets_min": 5719
},
{
"epoch": 0.18518518518518517,
"grad_norm": 1.6595151332203468,
"learning_rate": 9.824561403508772e-06,
"loss": 0.7006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23369759321212769,
"step": 15,
"valid_targets_mean": 10128.8,
"valid_targets_min": 7342
},
{
"epoch": 0.24691358024691357,
"grad_norm": 1.2957924076936072,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.6693,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21334174275398254,
"step": 20,
"valid_targets_mean": 9463.6,
"valid_targets_min": 4593
},
{
"epoch": 0.30864197530864196,
"grad_norm": 0.7601632497112111,
"learning_rate": 1.6842105263157896e-05,
"loss": 0.6305,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20799314975738525,
"step": 25,
"valid_targets_mean": 11056.0,
"valid_targets_min": 7081
},
{
"epoch": 0.37037037037037035,
"grad_norm": 0.5556757386274306,
"learning_rate": 2.035087719298246e-05,
"loss": 0.6038,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19161568582057953,
"step": 30,
"valid_targets_mean": 9825.4,
"valid_targets_min": 6376
},
{
"epoch": 0.43209876543209874,
"grad_norm": 0.5153662825148531,
"learning_rate": 2.385964912280702e-05,
"loss": 0.5804,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19309845566749573,
"step": 35,
"valid_targets_mean": 10027.2,
"valid_targets_min": 5491
},
{
"epoch": 0.49382716049382713,
"grad_norm": 0.4106317408119749,
"learning_rate": 2.7368421052631583e-05,
"loss": 0.5557,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1776849925518036,
"step": 40,
"valid_targets_mean": 9618.7,
"valid_targets_min": 3252
},
{
"epoch": 0.5555555555555556,
"grad_norm": 0.35411307929247426,
"learning_rate": 3.087719298245614e-05,
"loss": 0.5349,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17469243705272675,
"step": 45,
"valid_targets_mean": 10166.2,
"valid_targets_min": 6091
},
{
"epoch": 0.6172839506172839,
"grad_norm": 0.3028156061226061,
"learning_rate": 3.43859649122807e-05,
"loss": 0.5171,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16911481320858002,
"step": 50,
"valid_targets_mean": 10112.2,
"valid_targets_min": 1278
},
{
"epoch": 0.6790123456790124,
"grad_norm": 0.24266635032746242,
"learning_rate": 3.789473684210526e-05,
"loss": 0.5033,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15108071267604828,
"step": 55,
"valid_targets_mean": 9476.6,
"valid_targets_min": 405
},
{
"epoch": 0.7407407407407407,
"grad_norm": 0.225540686496095,
"learning_rate": 3.999848220229662e-05,
"loss": 0.4919,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1633068025112152,
"step": 60,
"valid_targets_mean": 10242.9,
"valid_targets_min": 6762
},
{
"epoch": 0.8024691358024691,
"grad_norm": 0.233196689176046,
"learning_rate": 3.998140962368987e-05,
"loss": 0.479,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15809178352355957,
"step": 65,
"valid_targets_mean": 10012.9,
"valid_targets_min": 5638
},
{
"epoch": 0.8641975308641975,
"grad_norm": 0.22642340209998182,
"learning_rate": 3.994538346771576e-05,
"loss": 0.4669,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.145830899477005,
"step": 70,
"valid_targets_mean": 10058.6,
"valid_targets_min": 7540
},
{
"epoch": 0.9259259259259259,
"grad_norm": 0.21957679869413865,
"learning_rate": 3.989043790736547e-05,
"loss": 0.4607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14295609295368195,
"step": 75,
"valid_targets_mean": 9880.9,
"valid_targets_min": 4567
},
{
"epoch": 0.9876543209876543,
"grad_norm": 0.259082441417068,
"learning_rate": 3.9816625061831206e-05,
"loss": 0.4539,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14566341042518616,
"step": 80,
"valid_targets_mean": 10036.9,
"valid_targets_min": 6974
},
{
"epoch": 1.0493827160493827,
"grad_norm": 0.29381252653535816,
"learning_rate": 3.972401494706805e-05,
"loss": 0.4489,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14456677436828613,
"step": 85,
"valid_targets_mean": 10007.5,
"valid_targets_min": 6170
},
{
"epoch": 1.1111111111111112,
"grad_norm": 0.2514362103454548,
"learning_rate": 3.9612695409379555e-05,
"loss": 0.4473,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.156326562166214,
"step": 90,
"valid_targets_mean": 10273.3,
"valid_targets_min": 6712
},
{
"epoch": 1.1728395061728394,
"grad_norm": 0.24751778888652187,
"learning_rate": 3.948277204209021e-05,
"loss": 0.4372,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14379754662513733,
"step": 95,
"valid_targets_mean": 10190.5,
"valid_targets_min": 5246
},
{
"epoch": 1.2345679012345678,
"grad_norm": 0.2494831866139906,
"learning_rate": 3.933436808538375e-05,
"loss": 0.4398,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13491873443126678,
"step": 100,
"valid_targets_mean": 9260.9,
"valid_targets_min": 5003
},
{
"epoch": 1.2962962962962963,
"grad_norm": 0.24159264180274795,
"learning_rate": 3.916762430940245e-05,
"loss": 0.4271,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1485365480184555,
"step": 105,
"valid_targets_mean": 10453.7,
"valid_targets_min": 6676
},
{
"epoch": 1.3580246913580247,
"grad_norm": 0.240571750078312,
"learning_rate": 3.898269888071803e-05,
"loss": 0.4294,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1464928835630417,
"step": 110,
"valid_targets_mean": 10115.0,
"valid_targets_min": 321
},
{
"epoch": 1.4197530864197532,
"grad_norm": 0.23171615291838898,
"learning_rate": 3.877976721230114e-05,
"loss": 0.4238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13552333414554596,
"step": 115,
"valid_targets_mean": 10116.0,
"valid_targets_min": 4165
},
{
"epoch": 1.4814814814814814,
"grad_norm": 0.23123951273615284,
"learning_rate": 3.85590217971315e-05,
"loss": 0.4247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1396927833557129,
"step": 120,
"valid_targets_mean": 9472.6,
"valid_targets_min": 3369
},
{
"epoch": 1.5432098765432098,
"grad_norm": 0.27876028230350475,
"learning_rate": 3.832067202560668e-05,
"loss": 0.4182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14294642210006714,
"step": 125,
"valid_targets_mean": 9702.7,
"valid_targets_min": 3563
},
{
"epoch": 1.6049382716049383,
"grad_norm": 0.24112690266489265,
"learning_rate": 3.806494398692258e-05,
"loss": 0.4223,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1513240933418274,
"step": 130,
"valid_targets_mean": 10773.1,
"valid_targets_min": 6916
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.28682215602360966,
"learning_rate": 3.77920802546142e-05,
"loss": 0.4225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13537049293518066,
"step": 135,
"valid_targets_mean": 8778.2,
"valid_targets_min": 4773
},
{
"epoch": 1.7283950617283952,
"grad_norm": 0.22939970462745563,
"learning_rate": 3.750233965645985e-05,
"loss": 0.4182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1321515440940857,
"step": 140,
"valid_targets_mean": 9729.9,
"valid_targets_min": 4328
},
{
"epoch": 1.7901234567901234,
"grad_norm": 0.2532856415818664,
"learning_rate": 3.719599702896745e-05,
"loss": 0.4163,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13605017960071564,
"step": 145,
"valid_targets_mean": 10582.2,
"valid_targets_min": 7105
},
{
"epoch": 1.8518518518518519,
"grad_norm": 0.21865623660828168,
"learning_rate": 3.687334295667533e-05,
"loss": 0.4135,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1345449984073639,
"step": 150,
"valid_targets_mean": 9758.0,
"valid_targets_min": 3245
},
{
"epoch": 1.9135802469135803,
"grad_norm": 0.22032076579466348,
"learning_rate": 3.653468349651527e-05,
"loss": 0.4082,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1432110071182251,
"step": 155,
"valid_targets_mean": 10140.4,
"valid_targets_min": 3954
},
{
"epoch": 1.9753086419753085,
"grad_norm": 0.2300084517459775,
"learning_rate": 3.6180339887498953e-05,
"loss": 0.4116,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14358800649642944,
"step": 160,
"valid_targets_mean": 10277.0,
"valid_targets_min": 5517
},
{
"epoch": 2.037037037037037,
"grad_norm": 0.26725994957529775,
"learning_rate": 3.581064824600327e-05,
"loss": 0.4073,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12686286866664886,
"step": 165,
"valid_targets_mean": 9887.2,
"valid_targets_min": 6671
},
{
"epoch": 2.0987654320987654,
"grad_norm": 0.2839301734419817,
"learning_rate": 3.542595924694362e-05,
"loss": 0.4006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12411850690841675,
"step": 170,
"valid_targets_mean": 10430.6,
"valid_targets_min": 4962
},
{
"epoch": 2.1604938271604937,
"grad_norm": 0.23886149599663145,
"learning_rate": 3.502663779113747e-05,
"loss": 0.4057,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1273406594991684,
"step": 175,
"valid_targets_mean": 9780.4,
"valid_targets_min": 6883
},
{
"epoch": 2.2222222222222223,
"grad_norm": 0.2508286434733983,
"learning_rate": 3.4613062659173865e-05,
"loss": 0.4005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13528917729854584,
"step": 180,
"valid_targets_mean": 10432.4,
"valid_targets_min": 2672
},
{
"epoch": 2.2839506172839505,
"grad_norm": 0.28154810246127465,
"learning_rate": 3.418562615211707e-05,
"loss": 0.3986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13502192497253418,
"step": 185,
"valid_targets_mean": 9436.0,
"valid_targets_min": 321
},
{
"epoch": 2.3456790123456788,
"grad_norm": 0.25037886356869404,
"learning_rate": 3.374473371938526e-05,
"loss": 0.4055,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13342218101024628,
"step": 190,
"valid_targets_mean": 10217.9,
"valid_targets_min": 5884
},
{
"epoch": 2.4074074074074074,
"grad_norm": 0.22241283286592672,
"learning_rate": 3.329080357415716e-05,
"loss": 0.4011,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13278549909591675,
"step": 195,
"valid_targets_mean": 10201.8,
"valid_targets_min": 5485
},
{
"epoch": 2.4691358024691357,
"grad_norm": 0.25914481987156485,
"learning_rate": 3.282426629667157e-05,
"loss": 0.4,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13765950500965118,
"step": 200,
"valid_targets_mean": 9836.8,
"valid_targets_min": 1763
},
{
"epoch": 2.5308641975308643,
"grad_norm": 0.2678736062298823,
"learning_rate": 3.234556442579586e-05,
"loss": 0.3963,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1374882459640503,
"step": 205,
"valid_targets_mean": 9808.9,
"valid_targets_min": 5540
},
{
"epoch": 2.5925925925925926,
"grad_norm": 0.22739006549004376,
"learning_rate": 3.18551520392511e-05,
"loss": 0.3911,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11978818476200104,
"step": 210,
"valid_targets_mean": 9023.9,
"valid_targets_min": 5865
},
{
"epoch": 2.6543209876543212,
"grad_norm": 0.2480434833866747,
"learning_rate": 3.1353494322891806e-05,
"loss": 0.3967,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13371124863624573,
"step": 215,
"valid_targets_mean": 10042.4,
"valid_targets_min": 5992
},
{
"epoch": 2.7160493827160495,
"grad_norm": 0.2415014429826874,
"learning_rate": 3.084106712944899e-05,
"loss": 0.3924,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11986161768436432,
"step": 220,
"valid_targets_mean": 8822.5,
"valid_targets_min": 5914
},
{
"epoch": 2.7777777777777777,
"grad_norm": 0.22748383065140926,
"learning_rate": 3.0318356527155024e-05,
"loss": 0.3959,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13959044218063354,
"step": 225,
"valid_targets_mean": 11017.7,
"valid_targets_min": 7406
},
{
"epoch": 2.8395061728395063,
"grad_norm": 0.22579559038440425,
"learning_rate": 2.9785858338678474e-05,
"loss": 0.3958,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1356910616159439,
"step": 230,
"valid_targets_mean": 10416.5,
"valid_targets_min": 5935
},
{
"epoch": 2.9012345679012346,
"grad_norm": 0.259395872875029,
"learning_rate": 2.924407767080627e-05,
"loss": 0.3952,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13255149126052856,
"step": 235,
"valid_targets_mean": 10616.9,
"valid_targets_min": 452
},
{
"epoch": 2.962962962962963,
"grad_norm": 0.2505789103446424,
"learning_rate": 2.8693528435319304e-05,
"loss": 0.3927,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13311100006103516,
"step": 240,
"valid_targets_mean": 10259.8,
"valid_targets_min": 5469
},
{
"epoch": 3.0246913580246915,
"grad_norm": 0.25985101425137663,
"learning_rate": 2.813473286151601e-05,
"loss": 0.3877,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1392827033996582,
"step": 245,
"valid_targets_mean": 10576.5,
"valid_targets_min": 6382
},
{
"epoch": 3.0864197530864197,
"grad_norm": 0.2523285023282264,
"learning_rate": 2.756822100084621e-05,
"loss": 0.3882,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13310596346855164,
"step": 250,
"valid_targets_mean": 10070.9,
"valid_targets_min": 5036
},
{
"epoch": 3.148148148148148,
"grad_norm": 0.24591905608450526,
"learning_rate": 2.6994530224125225e-05,
"loss": 0.3876,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12935864925384521,
"step": 255,
"valid_targets_mean": 9908.3,
"valid_targets_min": 5504
},
{
"epoch": 3.2098765432098766,
"grad_norm": 0.23137220587321322,
"learning_rate": 2.6414204711805106e-05,
"loss": 0.3827,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12253047525882721,
"step": 260,
"valid_targets_mean": 9940.8,
"valid_targets_min": 5580
},
{
"epoch": 3.271604938271605,
"grad_norm": 0.22538191153928455,
"learning_rate": 2.5827794937786497e-05,
"loss": 0.3896,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12234168499708176,
"step": 265,
"valid_targets_mean": 9346.2,
"valid_targets_min": 2359
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.23311402995020197,
"learning_rate": 2.523585714726081e-05,
"loss": 0.3879,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1369541585445404,
"step": 270,
"valid_targets_mean": 10217.0,
"valid_targets_min": 5332
},
{
"epoch": 3.3950617283950617,
"grad_norm": 0.2514619172750129,
"learning_rate": 2.4638952829077964e-05,
"loss": 0.3823,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1313236802816391,
"step": 275,
"valid_targets_mean": 10161.1,
"valid_targets_min": 5918
},
{
"epoch": 3.45679012345679,
"grad_norm": 0.21958361450520822,
"learning_rate": 2.4037648183140205e-05,
"loss": 0.3813,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12724569439888,
"step": 280,
"valid_targets_mean": 10332.4,
"valid_targets_min": 6695
},
{
"epoch": 3.5185185185185186,
"grad_norm": 0.2059815034095156,
"learning_rate": 2.3432513583327198e-05,
"loss": 0.3789,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12159313261508942,
"step": 285,
"valid_targets_mean": 9543.0,
"valid_targets_min": 5540
},
{
"epoch": 3.580246913580247,
"grad_norm": 0.2172755088091282,
"learning_rate": 2.282412303646183e-05,
"loss": 0.3824,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13618609309196472,
"step": 290,
"valid_targets_mean": 10702.6,
"valid_targets_min": 5676
},
{
"epoch": 3.6419753086419755,
"grad_norm": 0.23724659970939752,
"learning_rate": 2.2213053637830016e-05,
"loss": 0.3827,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13978168368339539,
"step": 295,
"valid_targets_mean": 10434.6,
"valid_targets_min": 6350
},
{
"epoch": 3.7037037037037037,
"grad_norm": 0.21912152242726737,
"learning_rate": 2.1599885023770833e-05,
"loss": 0.3791,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1349799931049347,
"step": 300,
"valid_targets_mean": 10565.1,
"valid_targets_min": 6091
},
{
"epoch": 3.765432098765432,
"grad_norm": 0.23294883680524386,
"learning_rate": 2.098519882185634e-05,
"loss": 0.3835,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12474770843982697,
"step": 305,
"valid_targets_mean": 9782.5,
"valid_targets_min": 6639
},
{
"epoch": 3.8271604938271606,
"grad_norm": 0.210413727788084,
"learning_rate": 2.03695780991826e-05,
"loss": 0.3793,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12241166830062866,
"step": 310,
"valid_targets_mean": 9305.9,
"valid_targets_min": 452
},
{
"epoch": 3.888888888888889,
"grad_norm": 0.2336083067719978,
"learning_rate": 1.9753606809295234e-05,
"loss": 0.3817,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1337265968322754,
"step": 315,
"valid_targets_mean": 10231.2,
"valid_targets_min": 5605
},
{
"epoch": 3.950617283950617,
"grad_norm": 0.22993388632584225,
"learning_rate": 1.9137869238274095e-05,
"loss": 0.3867,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12258920818567276,
"step": 320,
"valid_targets_mean": 9413.1,
"valid_targets_min": 3903
},
{
"epoch": 4.012345679012346,
"grad_norm": 0.23931040818992602,
"learning_rate": 1.8522949450502522e-05,
"loss": 0.385,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11768842488527298,
"step": 325,
"valid_targets_mean": 10013.8,
"valid_targets_min": 6307
},
{
"epoch": 4.074074074074074,
"grad_norm": 0.23366624804084687,
"learning_rate": 1.7909430734646936e-05,
"loss": 0.3755,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12187112867832184,
"step": 330,
"valid_targets_mean": 9138.8,
"valid_targets_min": 405
},
{
"epoch": 4.135802469135802,
"grad_norm": 0.2386159155105413,
"learning_rate": 1.7297895050372147e-05,
"loss": 0.3783,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11817651987075806,
"step": 335,
"valid_targets_mean": 9925.3,
"valid_targets_min": 5315
},
{
"epoch": 4.197530864197531,
"grad_norm": 0.24141329497025246,
"learning_rate": 1.66889224763174e-05,
"loss": 0.3786,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12400516867637634,
"step": 340,
"valid_targets_mean": 10173.1,
"valid_targets_min": 6521
},
{
"epoch": 4.2592592592592595,
"grad_norm": 0.21961371398861715,
"learning_rate": 1.6083090659856665e-05,
"loss": 0.3769,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12499363720417023,
"step": 345,
"valid_targets_mean": 9886.8,
"valid_targets_min": 3903
},
{
"epoch": 4.320987654320987,
"grad_norm": 0.20691809860193006,
"learning_rate": 1.5480974269165053e-05,
"loss": 0.3765,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12656159698963165,
"step": 350,
"valid_targets_mean": 10278.9,
"valid_targets_min": 5947
},
{
"epoch": 4.382716049382716,
"grad_norm": 0.2080579551581286,
"learning_rate": 1.4883144448111288e-05,
"loss": 0.3756,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12579810619354248,
"step": 355,
"valid_targets_mean": 9957.8,
"valid_targets_min": 6975
},
{
"epoch": 4.444444444444445,
"grad_norm": 0.22870148650025382,
"learning_rate": 1.4290168274493161e-05,
"loss": 0.3743,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12021129578351974,
"step": 360,
"valid_targets_mean": 9520.9,
"valid_targets_min": 4051
},
{
"epoch": 4.506172839506172,
"grad_norm": 0.21233421060501143,
"learning_rate": 1.3702608222129845e-05,
"loss": 0.3785,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13181990385055542,
"step": 365,
"valid_targets_mean": 9865.2,
"valid_targets_min": 2359
},
{
"epoch": 4.567901234567901,
"grad_norm": 0.21174790278305963,
"learning_rate": 1.3121021627321438e-05,
"loss": 0.3762,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11894433945417404,
"step": 370,
"valid_targets_mean": 9994.8,
"valid_targets_min": 6103
},
{
"epoch": 4.62962962962963,
"grad_norm": 0.22880042834392986,
"learning_rate": 1.254596016018172e-05,
"loss": 0.3718,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11400240659713745,
"step": 375,
"valid_targets_mean": 9684.1,
"valid_targets_min": 1809
},
{
"epoch": 4.6913580246913575,
"grad_norm": 0.20714503394784406,
"learning_rate": 1.1977969301345627e-05,
"loss": 0.3741,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12407234311103821,
"step": 380,
"valid_targets_mean": 10558.5,
"valid_targets_min": 3563
},
{
"epoch": 4.753086419753086,
"grad_norm": 0.2245999439376979,
"learning_rate": 1.1417587824547822e-05,
"loss": 0.3756,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11711939424276352,
"step": 385,
"valid_targets_mean": 10091.3,
"valid_targets_min": 5831
},
{
"epoch": 4.814814814814815,
"grad_norm": 0.19667919626855399,
"learning_rate": 1.086534728556319e-05,
"loss": 0.3701,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1273461878299713,
"step": 390,
"valid_targets_mean": 9572.9,
"valid_targets_min": 4151
},
{
"epoch": 4.8765432098765435,
"grad_norm": 0.21765308723193594,
"learning_rate": 1.032177151799397e-05,
"loss": 0.3723,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12290293723344803,
"step": 395,
"valid_targets_mean": 10243.7,
"valid_targets_min": 403
},
{
"epoch": 4.938271604938271,
"grad_norm": 0.2135466709612721,
"learning_rate": 9.787376136381866e-06,
"loss": 0.3801,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12904667854309082,
"step": 400,
"valid_targets_mean": 10656.8,
"valid_targets_min": 5102
},
{
"epoch": 5.0,
"grad_norm": 0.2361103567960002,
"learning_rate": 9.262668047116399e-06,
"loss": 0.3742,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11932142823934555,
"step": 405,
"valid_targets_mean": 9562.3,
"valid_targets_min": 5152
},
{
"epoch": 5.061728395061729,
"grad_norm": 0.19675209800473384,
"learning_rate": 8.748144967603538e-06,
"loss": 0.3748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12642104923725128,
"step": 410,
"valid_targets_mean": 9635.7,
"valid_targets_min": 3304
},
{
"epoch": 5.1234567901234565,
"grad_norm": 0.19935243749779888,
"learning_rate": 8.24429495415054e-06,
"loss": 0.3701,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1311061680316925,
"step": 415,
"valid_targets_mean": 10193.9,
"valid_targets_min": 4731
},
{
"epoch": 5.185185185185185,
"grad_norm": 0.20563411967663517,
"learning_rate": 7.751595939015005e-06,
"loss": 0.3648,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12725985050201416,
"step": 420,
"valid_targets_mean": 9884.4,
"valid_targets_min": 6098
},
{
"epoch": 5.246913580246914,
"grad_norm": 0.23745531664425062,
"learning_rate": 7.270515277057178e-06,
"loss": 0.3704,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12347559630870819,
"step": 425,
"valid_targets_mean": 10205.5,
"valid_targets_min": 4462
},
{
"epoch": 5.308641975308642,
"grad_norm": 0.1974181070193832,
"learning_rate": 6.801509302425553e-06,
"loss": 0.3728,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13114777207374573,
"step": 430,
"valid_targets_mean": 10419.2,
"valid_targets_min": 6888
},
{
"epoch": 5.37037037037037,
"grad_norm": 0.20324970189648778,
"learning_rate": 6.3450228956962915e-06,
"loss": 0.3697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12115702033042908,
"step": 435,
"valid_targets_mean": 9735.0,
"valid_targets_min": 452
},
{
"epoch": 5.432098765432099,
"grad_norm": 0.21598912192747013,
"learning_rate": 5.90148906187706e-06,
"loss": 0.3716,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1321217566728592,
"step": 440,
"valid_targets_mean": 10511.7,
"valid_targets_min": 6367
},
{
"epoch": 5.493827160493828,
"grad_norm": 0.18869463173340542,
"learning_rate": 5.471328519675521e-06,
"loss": 0.3706,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11856191605329514,
"step": 445,
"valid_targets_mean": 10337.3,
"valid_targets_min": 6856
},
{
"epoch": 5.555555555555555,
"grad_norm": 0.3057251506093963,
"learning_rate": 5.054949302422178e-06,
"loss": 0.374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1269569993019104,
"step": 450,
"valid_targets_mean": 10418.6,
"valid_targets_min": 7562
},
{
"epoch": 5.617283950617284,
"grad_norm": 0.18558587270619328,
"learning_rate": 4.65274637102606e-06,
"loss": 0.37,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12654009461402893,
"step": 455,
"valid_targets_mean": 10451.2,
"valid_targets_min": 6890
},
{
"epoch": 5.679012345679013,
"grad_norm": 0.18925832500358447,
"learning_rate": 4.265101239330336e-06,
"loss": 0.3692,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11512520909309387,
"step": 460,
"valid_targets_mean": 9696.8,
"valid_targets_min": 6270
},
{
"epoch": 5.7407407407407405,
"grad_norm": 0.2123661843964898,
"learning_rate": 3.892381612223348e-06,
"loss": 0.3731,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12849582731723785,
"step": 465,
"valid_targets_mean": 9772.2,
"valid_targets_min": 5044
},
{
"epoch": 5.802469135802469,
"grad_norm": 0.19388821506347134,
"learning_rate": 3.534941036848258e-06,
"loss": 0.3697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11666221916675568,
"step": 470,
"valid_targets_mean": 9419.0,
"valid_targets_min": 5724
},
{
"epoch": 5.864197530864198,
"grad_norm": 0.19304850153428105,
"learning_rate": 3.193118567242148e-06,
"loss": 0.3717,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12014692276716232,
"step": 475,
"valid_targets_mean": 10304.2,
"valid_targets_min": 6152
},
{
"epoch": 5.925925925925926,
"grad_norm": 0.20024207191873417,
"learning_rate": 2.8672384427227484e-06,
"loss": 0.3728,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13074594736099243,
"step": 480,
"valid_targets_mean": 10167.5,
"valid_targets_min": 4046
},
{
"epoch": 5.987654320987654,
"grad_norm": 0.18822272097106288,
"learning_rate": 2.5576097803277833e-06,
"loss": 0.3705,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12458598613739014,
"step": 485,
"valid_targets_mean": 9746.9,
"valid_targets_min": 5329
},
{
"epoch": 6.049382716049383,
"grad_norm": 0.1920896017408439,
"learning_rate": 2.264526281598762e-06,
"loss": 0.3731,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11540110409259796,
"step": 490,
"valid_targets_mean": 9151.7,
"valid_targets_min": 2545
},
{
"epoch": 6.111111111111111,
"grad_norm": 0.1963272832970611,
"learning_rate": 1.988265953987254e-06,
"loss": 0.3719,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1279304027557373,
"step": 495,
"valid_targets_mean": 10241.9,
"valid_targets_min": 6053
},
{
"epoch": 6.172839506172839,
"grad_norm": 0.19192168324701758,
"learning_rate": 1.7290908471479805e-06,
"loss": 0.3697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11920693516731262,
"step": 500,
"valid_targets_mean": 9656.2,
"valid_targets_min": 3898
},
{
"epoch": 6.234567901234568,
"grad_norm": 0.1838675547975408,
"learning_rate": 1.487246804368876e-06,
"loss": 0.3665,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1233929991722107,
"step": 505,
"valid_targets_mean": 10345.6,
"valid_targets_min": 5701
},
{
"epoch": 6.296296296296296,
"grad_norm": 0.18797572906177723,
"learning_rate": 1.2629632293737903e-06,
"loss": 0.3707,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12090076506137848,
"step": 510,
"valid_targets_mean": 9881.3,
"valid_targets_min": 6218
},
{
"epoch": 6.3580246913580245,
"grad_norm": 0.20133771310875587,
"learning_rate": 1.0564528687191954e-06,
"loss": 0.3745,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12166153639554977,
"step": 515,
"valid_targets_mean": 9838.0,
"valid_targets_min": 5384
},
{
"epoch": 6.419753086419753,
"grad_norm": 0.19437281332257697,
"learning_rate": 8.679116099911855e-07,
"loss": 0.3682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12425738573074341,
"step": 520,
"valid_targets_mean": 10094.1,
"valid_targets_min": 5805
},
{
"epoch": 6.481481481481482,
"grad_norm": 0.20313423058358762,
"learning_rate": 6.975182959942195e-07,
"loss": 0.3647,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12808644771575928,
"step": 525,
"valid_targets_mean": 10183.5,
"valid_targets_min": 6429
},
{
"epoch": 6.54320987654321,
"grad_norm": 0.19806939309627028,
"learning_rate": 5.454345551079044e-07,
"loss": 0.371,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11769580841064453,
"step": 530,
"valid_targets_mean": 10095.5,
"valid_targets_min": 6403
},
{
"epoch": 6.604938271604938,
"grad_norm": 0.19162098246941137,
"learning_rate": 4.118046479726823e-07,
"loss": 0.3693,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12202047556638718,
"step": 535,
"valid_targets_mean": 10323.8,
"valid_targets_min": 6837
},
{
"epoch": 6.666666666666667,
"grad_norm": 0.18761681557750176,
"learning_rate": 2.9675533064986937e-07,
"loss": 0.3699,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1217864602804184,
"step": 540,
"valid_targets_mean": 9897.5,
"valid_targets_min": 5402
},
{
"epoch": 6.728395061728395,
"grad_norm": 0.19040954143447755,
"learning_rate": 2.0039573438586091e-07,
"loss": 0.3682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1232626885175705,
"step": 545,
"valid_targets_mean": 10269.8,
"valid_targets_min": 6322
},
{
"epoch": 6.790123456790123,
"grad_norm": 0.1840751374024284,
"learning_rate": 1.2281726209452782e-07,
"loss": 0.3704,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1274663656949997,
"step": 550,
"valid_targets_mean": 10375.0,
"valid_targets_min": 5210
},
{
"epoch": 6.851851851851852,
"grad_norm": 0.1884444000769013,
"learning_rate": 6.409350165601957e-08,
"loss": 0.3645,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11835850775241852,
"step": 555,
"valid_targets_mean": 9355.1,
"valid_targets_min": 5793
},
{
"epoch": 6.91358024691358,
"grad_norm": 0.18733829002548932,
"learning_rate": 2.4280156114202537e-08,
"loss": 0.3741,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12829899787902832,
"step": 560,
"valid_targets_mean": 9559.5,
"valid_targets_min": 4937
},
{
"epoch": 6.9753086419753085,
"grad_norm": 0.1898364388549884,
"learning_rate": 3.414990838945809e-09,
"loss": 0.3617,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.122779980301857,
"step": 565,
"valid_targets_mean": 10042.1,
"valid_targets_min": 5310
},
{
"epoch": 7.0,
"step": 567,
"total_flos": 2.4619618330916946e+18,
"train_loss": 0.0,
"train_runtime": 6.5836,
"train_samples_per_second": 8267.793,
"train_steps_per_second": 86.123
}
],
"logging_steps": 5,
"max_steps": 567,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.4619618330916946e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}