Files
grpo-tool-sat-sft-qwen3-1p7…/trainer_state.json
ModelHub XC c1b860e80a 初始化项目,由ModelHub XC社区提供模型
Model: raca-workspace-v1/grpo-tool-sat-sft-qwen3-1p7b-sft-20260419-075623-96e9
Source: Original Platform
2026-05-28 09:48:20 +08:00

324 lines
7.2 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"grad_norm": 11.879703521728516,
"learning_rate": 1.6000000000000003e-05,
"loss": 2.3428,
"step": 25
},
{
"epoch": 0.1,
"grad_norm": 5.421560287475586,
"learning_rate": 1.998107236150145e-05,
"loss": 0.7368,
"step": 50
},
{
"epoch": 0.15,
"grad_norm": 2.313567876815796,
"learning_rate": 1.989863301061654e-05,
"loss": 0.5067,
"step": 75
},
{
"epoch": 0.2,
"grad_norm": 2.5316965579986572,
"learning_rate": 1.9751334064160708e-05,
"loss": 0.4279,
"step": 100
},
{
"epoch": 0.25,
"grad_norm": 2.5976717472076416,
"learning_rate": 1.9540140680664915e-05,
"loss": 0.419,
"step": 125
},
{
"epoch": 0.3,
"grad_norm": 1.39866304397583,
"learning_rate": 1.9266436679230866e-05,
"loss": 0.4207,
"step": 150
},
{
"epoch": 0.35,
"grad_norm": 1.8767157793045044,
"learning_rate": 1.8932015472223692e-05,
"loss": 0.4169,
"step": 175
},
{
"epoch": 0.4,
"grad_norm": 2.209038734436035,
"learning_rate": 1.8539068314154355e-05,
"loss": 0.4185,
"step": 200
},
{
"epoch": 0.45,
"grad_norm": 2.1389389038085938,
"learning_rate": 1.8090169943749477e-05,
"loss": 0.4197,
"step": 225
},
{
"epoch": 0.5,
"grad_norm": 1.957894206047058,
"learning_rate": 1.758826171328727e-05,
"loss": 0.42,
"step": 250
},
{
"epoch": 0.55,
"grad_norm": 1.7354559898376465,
"learning_rate": 1.7036632315742464e-05,
"loss": 0.4209,
"step": 275
},
{
"epoch": 0.6,
"grad_norm": 1.4375723600387573,
"learning_rate": 1.6438896236023374e-05,
"loss": 0.4168,
"step": 300
},
{
"epoch": 0.65,
"grad_norm": 1.7558571100234985,
"learning_rate": 1.57989700674967e-05,
"loss": 0.4155,
"step": 325
},
{
"epoch": 0.7,
"grad_norm": 1.6743788719177246,
"learning_rate": 1.512104684898319e-05,
"loss": 0.4171,
"step": 350
},
{
"epoch": 0.75,
"grad_norm": 1.1145944595336914,
"learning_rate": 1.4409568590377918e-05,
"loss": 0.4143,
"step": 375
},
{
"epoch": 0.8,
"grad_norm": 2.5990407466888428,
"learning_rate": 1.3669197166917723e-05,
"loss": 0.4144,
"step": 400
},
{
"epoch": 0.85,
"grad_norm": 2.7390551567077637,
"learning_rate": 1.2904783772807534e-05,
"loss": 0.4152,
"step": 425
},
{
"epoch": 0.9,
"grad_norm": 1.1414133310317993,
"learning_rate": 1.2121337134357121e-05,
"loss": 0.4161,
"step": 450
},
{
"epoch": 0.95,
"grad_norm": 1.2553099393844604,
"learning_rate": 1.1323990690907734e-05,
"loss": 0.4154,
"step": 475
},
{
"epoch": 1.0,
"grad_norm": 1.7206685543060303,
"learning_rate": 1.0517968958591705e-05,
"loss": 0.4142,
"step": 500
},
{
"epoch": 1.05,
"grad_norm": 1.1427602767944336,
"learning_rate": 9.708553297322407e-06,
"loss": 0.4138,
"step": 525
},
{
"epoch": 1.1,
"grad_norm": 1.4284802675247192,
"learning_rate": 8.901047305322172e-06,
"loss": 0.4142,
"step": 550
},
{
"epoch": 1.15,
"grad_norm": 1.6178677082061768,
"learning_rate": 8.100742067936432e-06,
"loss": 0.4138,
"step": 575
},
{
"epoch": 1.2,
"grad_norm": 1.2968213558197021,
"learning_rate": 7.312881488436928e-06,
"loss": 0.4125,
"step": 600
},
{
"epoch": 1.25,
"grad_norm": 1.2272377014160156,
"learning_rate": 6.542627927979772e-06,
"loss": 0.4108,
"step": 625
},
{
"epoch": 1.3,
"grad_norm": 1.5362012386322021,
"learning_rate": 5.795028379858355e-06,
"loss": 0.4112,
"step": 650
},
{
"epoch": 1.35,
"grad_norm": 1.4780094623565674,
"learning_rate": 5.074981399690219e-06,
"loss": 0.413,
"step": 675
},
{
"epoch": 1.4,
"grad_norm": 2.2684173583984375,
"learning_rate": 4.3872050082238535e-06,
"loss": 0.4136,
"step": 700
},
{
"epoch": 1.45,
"grad_norm": 1.7533961534500122,
"learning_rate": 3.736205777078381e-06,
"loss": 0.4115,
"step": 725
},
{
"epoch": 1.5,
"grad_norm": 2.0084352493286133,
"learning_rate": 3.126249299978086e-06,
"loss": 0.4125,
"step": 750
},
{
"epoch": 1.55,
"grad_norm": 1.6931928396224976,
"learning_rate": 2.5613322429654573e-06,
"loss": 0.4122,
"step": 775
},
{
"epoch": 1.6,
"grad_norm": 1.7414414882659912,
"learning_rate": 2.0451561567303378e-06,
"loss": 0.412,
"step": 800
},
{
"epoch": 1.65,
"grad_norm": 1.7203795909881592,
"learning_rate": 1.5811032226467304e-06,
"loss": 0.4123,
"step": 825
},
{
"epoch": 1.7,
"grad_norm": 2.252686023712158,
"learning_rate": 1.1722140914384162e-06,
"loss": 0.4106,
"step": 850
},
{
"epoch": 1.75,
"grad_norm": 1.8625959157943726,
"learning_rate": 8.211679596828481e-07,
"loss": 0.4118,
"step": 875
},
{
"epoch": 1.8,
"grad_norm": 1.6265811920166016,
"learning_rate": 5.30265014699628e-07,
"loss": 0.4125,
"step": 900
},
{
"epoch": 1.85,
"grad_norm": 1.9654788970947266,
"learning_rate": 3.0141136285129825e-07,
"loss": 0.4115,
"step": 925
},
{
"epoch": 1.9,
"grad_norm": 1.8853169679641724,
"learning_rate": 1.361065400119399e-07,
"loss": 0.4122,
"step": 950
},
{
"epoch": 1.95,
"grad_norm": 1.8307050466537476,
"learning_rate": 3.543368603973529e-08,
"loss": 0.4112,
"step": 975
},
{
"epoch": 2.0,
"grad_norm": 1.647140622138977,
"learning_rate": 5.244763404133046e-11,
"loss": 0.4103,
"step": 1000
},
{
"epoch": 2.0,
"step": 1000,
"total_flos": 7576988418048000.0,
"train_loss": 0.4733153915405273,
"train_runtime": 560.9841,
"train_samples_per_second": 28.521,
"train_steps_per_second": 1.783
}
],
"logging_steps": 25,
"max_steps": 1000,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7576988418048000.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}