Llama3.2-3B_Paper_Impact_pa…/trainer_state.json

{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 144,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.034904013961605584,
      "grad_norm": 0.2724517285823822,
      "learning_rate": 5.333333333333334e-06,
      "loss": 0.0934,
      "step": 5
    },
    {
      "epoch": 0.06980802792321117,
      "grad_norm": 0.3414818048477173,
      "learning_rate": 1.2e-05,
      "loss": 0.0773,
      "step": 10
    },
    {
      "epoch": 0.10471204188481675,
      "grad_norm": 0.07792749255895615,
      "learning_rate": 1.866666666666667e-05,
      "loss": 0.0711,
      "step": 15
    },
    {
      "epoch": 0.13961605584642234,
      "grad_norm": 0.0294520054012537,
      "learning_rate": 1.995259033893236e-05,
      "loss": 0.0736,
      "step": 20
    },
    {
      "epoch": 0.17452006980802792,
      "grad_norm": 0.013957683928310871,
      "learning_rate": 1.9760758775559275e-05,
      "loss": 0.0697,
      "step": 25
    },
    {
      "epoch": 0.2094240837696335,
      "grad_norm": 0.065118707716465,
      "learning_rate": 1.9424380828337146e-05,
      "loss": 0.0699,
      "step": 30
    },
    {
      "epoch": 0.2443280977312391,
      "grad_norm": 0.021100476384162903,
      "learning_rate": 1.894843789440892e-05,
      "loss": 0.0697,
      "step": 35
    },
    {
      "epoch": 0.2792321116928447,
      "grad_norm": 0.026198429986834526,
      "learning_rate": 1.833997817889878e-05,
      "loss": 0.0695,
      "step": 40
    },
    {
      "epoch": 0.31413612565445026,
      "grad_norm": 0.07283973693847656,
      "learning_rate": 1.760801231854278e-05,
      "loss": 0.07,
      "step": 45
    },
    {
      "epoch": 0.34904013961605584,
      "grad_norm": 0.04578598588705063,
      "learning_rate": 1.676337994380903e-05,
      "loss": 0.0701,
      "step": 50
    },
    {
      "epoch": 0.38394415357766143,
      "grad_norm": 0.10095158964395523,
      "learning_rate": 1.581858915557953e-05,
      "loss": 0.0698,
      "step": 55
    },
    {
      "epoch": 0.418848167539267,
      "grad_norm": 0.028562646359205246,
      "learning_rate": 1.4787631293572094e-05,
      "loss": 0.0699,
      "step": 60
    },
    {
      "epoch": 0.4537521815008726,
      "grad_norm": 0.02697976492345333,
      "learning_rate": 1.368577373958362e-05,
      "loss": 0.0695,
      "step": 65
    },
    {
      "epoch": 0.4886561954624782,
      "grad_norm": 0.0685800239443779,
      "learning_rate": 1.2529333823916807e-05,
      "loss": 0.0696,
      "step": 70
    },
    {
      "epoch": 0.5235602094240838,
      "grad_norm": 0.13133621215820312,
      "learning_rate": 1.133543718319398e-05,
      "loss": 0.0713,
      "step": 75
    },
    {
      "epoch": 0.5584642233856894,
      "grad_norm": 0.017290577292442322,
      "learning_rate": 1.0121764148019977e-05,
      "loss": 0.0696,
      "step": 80
    },
    {
      "epoch": 0.5933682373472949,
      "grad_norm": 0.05858515202999115,
      "learning_rate": 8.906287916221259e-06,
      "loss": 0.0696,
      "step": 85
    },
    {
      "epoch": 0.6282722513089005,
      "grad_norm": 0.07648473978042603,
      "learning_rate": 7.707008389035102e-06,
      "loss": 0.0699,
      "step": 90
    },
    {
      "epoch": 0.6631762652705061,
      "grad_norm": 0.052451424300670624,
      "learning_rate": 6.5416856118498874e-06,
      "loss": 0.0697,
      "step": 95
    },
    {
      "epoch": 0.6980802792321117,
      "grad_norm": 0.03691520541906357,
      "learning_rate": 5.427576766953615e-06,
      "loss": 0.0697,
      "step": 100
    },
    {
      "epoch": 0.7329842931937173,
      "grad_norm": 0.003152969991788268,
      "learning_rate": 4.381180613146396e-06,
      "loss": 0.0695,
      "step": 105
    },
    {
      "epoch": 0.7678883071553229,
      "grad_norm": 0.017924955114722252,
      "learning_rate": 3.4179931567925216e-06,
      "loss": 0.0694,
      "step": 110
    },
    {
      "epoch": 0.8027923211169284,
      "grad_norm": 0.04167533293366432,
      "learning_rate": 2.5522781725621814e-06,
      "loss": 0.0694,
      "step": 115
    },
    {
      "epoch": 0.837696335078534,
      "grad_norm": 0.03422262519598007,
      "learning_rate": 1.7968559722048906e-06,
      "loss": 0.0692,
      "step": 120
    },
    {
      "epoch": 0.8726003490401396,
      "grad_norm": 0.0365980863571167,
      "learning_rate": 1.1629135494628097e-06,
      "loss": 0.0696,
      "step": 125
    },
    {
      "epoch": 0.9075043630017452,
      "grad_norm": 0.032294586300849915,
      "learning_rate": 6.598389126745209e-07,
      "loss": 0.0695,
      "step": 130
    },
    {
      "epoch": 0.9424083769633508,
      "grad_norm": 0.001334571628831327,
      "learning_rate": 2.9508205842594727e-07,
      "loss": 0.0695,
      "step": 135
    },
    {
      "epoch": 0.9773123909249564,
      "grad_norm": 0.05335932970046997,
      "learning_rate": 7.404464507973608e-08,
      "loss": 0.0693,
      "step": 140
    },
    {
      "epoch": 1.0,
      "step": 144,
      "total_flos": 2.4545020729727386e+17,
      "train_loss": 0.07097241137590674,
      "train_runtime": 1113.1898,
      "train_samples_per_second": 16.457,
      "train_steps_per_second": 0.129
    }
  ],
  "logging_steps": 5,
  "max_steps": 144,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.4545020729727386e+17,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}
初始化项目，由ModelHub XC社区提供模型 Model: FlyPig23/Llama3.2-3B_Paper_Impact_patent_SFT_1ep Source: Original Platform 2026-04-13 17:17:03 +08:00			`{`
			`"best_global_step": null,`
			`"best_metric": null,`
			`"best_model_checkpoint": null,`
			`"epoch": 1.0,`
			`"eval_steps": 500,`
			`"global_step": 144,`
			`"is_hyper_param_search": false,`
			`"is_local_process_zero": true,`
			`"is_world_process_zero": true,`
			`"log_history": [`
			`{`
			`"epoch": 0.034904013961605584,`
			`"grad_norm": 0.2724517285823822,`
			`"learning_rate": 5.333333333333334e-06,`
			`"loss": 0.0934,`
			`"step": 5`
			`},`
			`{`
			`"epoch": 0.06980802792321117,`
			`"grad_norm": 0.3414818048477173,`
			`"learning_rate": 1.2e-05,`
			`"loss": 0.0773,`
			`"step": 10`
			`},`
			`{`
			`"epoch": 0.10471204188481675,`
			`"grad_norm": 0.07792749255895615,`
			`"learning_rate": 1.866666666666667e-05,`
			`"loss": 0.0711,`
			`"step": 15`
			`},`
			`{`
			`"epoch": 0.13961605584642234,`
			`"grad_norm": 0.0294520054012537,`
			`"learning_rate": 1.995259033893236e-05,`
			`"loss": 0.0736,`
			`"step": 20`
			`},`
			`{`
			`"epoch": 0.17452006980802792,`
			`"grad_norm": 0.013957683928310871,`
			`"learning_rate": 1.9760758775559275e-05,`
			`"loss": 0.0697,`
			`"step": 25`
			`},`
			`{`
			`"epoch": 0.2094240837696335,`
			`"grad_norm": 0.065118707716465,`
			`"learning_rate": 1.9424380828337146e-05,`
			`"loss": 0.0699,`
			`"step": 30`
			`},`
			`{`
			`"epoch": 0.2443280977312391,`
			`"grad_norm": 0.021100476384162903,`
			`"learning_rate": 1.894843789440892e-05,`
			`"loss": 0.0697,`
			`"step": 35`
			`},`
			`{`
			`"epoch": 0.2792321116928447,`
			`"grad_norm": 0.026198429986834526,`
			`"learning_rate": 1.833997817889878e-05,`
			`"loss": 0.0695,`
			`"step": 40`
			`},`
			`{`
			`"epoch": 0.31413612565445026,`
			`"grad_norm": 0.07283973693847656,`
			`"learning_rate": 1.760801231854278e-05,`
			`"loss": 0.07,`
			`"step": 45`
			`},`
			`{`
			`"epoch": 0.34904013961605584,`
			`"grad_norm": 0.04578598588705063,`
			`"learning_rate": 1.676337994380903e-05,`
			`"loss": 0.0701,`
			`"step": 50`
			`},`
			`{`
			`"epoch": 0.38394415357766143,`
			`"grad_norm": 0.10095158964395523,`
			`"learning_rate": 1.581858915557953e-05,`
			`"loss": 0.0698,`
			`"step": 55`
			`},`
			`{`
			`"epoch": 0.418848167539267,`
			`"grad_norm": 0.028562646359205246,`
			`"learning_rate": 1.4787631293572094e-05,`
			`"loss": 0.0699,`
			`"step": 60`
			`},`
			`{`
			`"epoch": 0.4537521815008726,`
			`"grad_norm": 0.02697976492345333,`
			`"learning_rate": 1.368577373958362e-05,`
			`"loss": 0.0695,`
			`"step": 65`
			`},`
			`{`
			`"epoch": 0.4886561954624782,`
			`"grad_norm": 0.0685800239443779,`
			`"learning_rate": 1.2529333823916807e-05,`
			`"loss": 0.0696,`
			`"step": 70`
			`},`
			`{`
			`"epoch": 0.5235602094240838,`
			`"grad_norm": 0.13133621215820312,`
			`"learning_rate": 1.133543718319398e-05,`
			`"loss": 0.0713,`
			`"step": 75`
			`},`
			`{`
			`"epoch": 0.5584642233856894,`
			`"grad_norm": 0.017290577292442322,`
			`"learning_rate": 1.0121764148019977e-05,`
			`"loss": 0.0696,`
			`"step": 80`
			`},`
			`{`
			`"epoch": 0.5933682373472949,`
			`"grad_norm": 0.05858515202999115,`
			`"learning_rate": 8.906287916221259e-06,`
			`"loss": 0.0696,`
			`"step": 85`
			`},`
			`{`
			`"epoch": 0.6282722513089005,`
			`"grad_norm": 0.07648473978042603,`
			`"learning_rate": 7.707008389035102e-06,`
			`"loss": 0.0699,`
			`"step": 90`
			`},`
			`{`
			`"epoch": 0.6631762652705061,`
			`"grad_norm": 0.052451424300670624,`
			`"learning_rate": 6.5416856118498874e-06,`
			`"loss": 0.0697,`
			`"step": 95`
			`},`
			`{`
			`"epoch": 0.6980802792321117,`
			`"grad_norm": 0.03691520541906357,`
			`"learning_rate": 5.427576766953615e-06,`
			`"loss": 0.0697,`
			`"step": 100`
			`},`
			`{`
			`"epoch": 0.7329842931937173,`
			`"grad_norm": 0.003152969991788268,`
			`"learning_rate": 4.381180613146396e-06,`
			`"loss": 0.0695,`
			`"step": 105`
			`},`
			`{`
			`"epoch": 0.7678883071553229,`
			`"grad_norm": 0.017924955114722252,`
			`"learning_rate": 3.4179931567925216e-06,`
			`"loss": 0.0694,`
			`"step": 110`
			`},`
			`{`
			`"epoch": 0.8027923211169284,`
			`"grad_norm": 0.04167533293366432,`
			`"learning_rate": 2.5522781725621814e-06,`
			`"loss": 0.0694,`
			`"step": 115`
			`},`
			`{`
			`"epoch": 0.837696335078534,`
			`"grad_norm": 0.03422262519598007,`
			`"learning_rate": 1.7968559722048906e-06,`
			`"loss": 0.0692,`
			`"step": 120`
			`},`
			`{`
			`"epoch": 0.8726003490401396,`
			`"grad_norm": 0.0365980863571167,`
			`"learning_rate": 1.1629135494628097e-06,`
			`"loss": 0.0696,`
			`"step": 125`
			`},`
			`{`
			`"epoch": 0.9075043630017452,`
			`"grad_norm": 0.032294586300849915,`
			`"learning_rate": 6.598389126745209e-07,`
			`"loss": 0.0695,`
			`"step": 130`
			`},`
			`{`
			`"epoch": 0.9424083769633508,`
			`"grad_norm": 0.001334571628831327,`
			`"learning_rate": 2.9508205842594727e-07,`
			`"loss": 0.0695,`
			`"step": 135`
			`},`
			`{`
			`"epoch": 0.9773123909249564,`
			`"grad_norm": 0.05335932970046997,`
			`"learning_rate": 7.404464507973608e-08,`
			`"loss": 0.0693,`
			`"step": 140`
			`},`
			`{`
			`"epoch": 1.0,`
			`"step": 144,`
			`"total_flos": 2.4545020729727386e+17,`
			`"train_loss": 0.07097241137590674,`
			`"train_runtime": 1113.1898,`
			`"train_samples_per_second": 16.457,`
			`"train_steps_per_second": 0.129`
			`}`
			`],`
			`"logging_steps": 5,`
			`"max_steps": 144,`
			`"num_input_tokens_seen": 0,`
			`"num_train_epochs": 1,`
			`"save_steps": 500,`
			`"stateful_callbacks": {`
			`"TrainerControl": {`
			`"args": {`
			`"should_epoch_stop": false,`
			`"should_evaluate": false,`
			`"should_log": false,`
			`"should_save": true,`
			`"should_training_stop": true`
			`},`
			`"attributes": {}`
			`}`
			`},`
			`"total_flos": 2.4545020729727386e+17,`
			`"train_batch_size": 8,`
			`"trial_name": null,`
			`"trial_params": null`
			`}`