grpo-tool-sat-sft-qwen3-1p7…/trainer_state.json

{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.0,
  "eval_steps": 500,
  "global_step": 1000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.05,
      "grad_norm": 11.879703521728516,
      "learning_rate": 1.6000000000000003e-05,
      "loss": 2.3428,
      "step": 25
    },
    {
      "epoch": 0.1,
      "grad_norm": 5.421560287475586,
      "learning_rate": 1.998107236150145e-05,
      "loss": 0.7368,
      "step": 50
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.313567876815796,
      "learning_rate": 1.989863301061654e-05,
      "loss": 0.5067,
      "step": 75
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.5316965579986572,
      "learning_rate": 1.9751334064160708e-05,
      "loss": 0.4279,
      "step": 100
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.5976717472076416,
      "learning_rate": 1.9540140680664915e-05,
      "loss": 0.419,
      "step": 125
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.39866304397583,
      "learning_rate": 1.9266436679230866e-05,
      "loss": 0.4207,
      "step": 150
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8767157793045044,
      "learning_rate": 1.8932015472223692e-05,
      "loss": 0.4169,
      "step": 175
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.209038734436035,
      "learning_rate": 1.8539068314154355e-05,
      "loss": 0.4185,
      "step": 200
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1389389038085938,
      "learning_rate": 1.8090169943749477e-05,
      "loss": 0.4197,
      "step": 225
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.957894206047058,
      "learning_rate": 1.758826171328727e-05,
      "loss": 0.42,
      "step": 250
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7354559898376465,
      "learning_rate": 1.7036632315742464e-05,
      "loss": 0.4209,
      "step": 275
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4375723600387573,
      "learning_rate": 1.6438896236023374e-05,
      "loss": 0.4168,
      "step": 300
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7558571100234985,
      "learning_rate": 1.57989700674967e-05,
      "loss": 0.4155,
      "step": 325
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6743788719177246,
      "learning_rate": 1.512104684898319e-05,
      "loss": 0.4171,
      "step": 350
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.1145944595336914,
      "learning_rate": 1.4409568590377918e-05,
      "loss": 0.4143,
      "step": 375
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.5990407466888428,
      "learning_rate": 1.3669197166917723e-05,
      "loss": 0.4144,
      "step": 400
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.7390551567077637,
      "learning_rate": 1.2904783772807534e-05,
      "loss": 0.4152,
      "step": 425
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.1414133310317993,
      "learning_rate": 1.2121337134357121e-05,
      "loss": 0.4161,
      "step": 450
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2553099393844604,
      "learning_rate": 1.1323990690907734e-05,
      "loss": 0.4154,
      "step": 475
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7206685543060303,
      "learning_rate": 1.0517968958591705e-05,
      "loss": 0.4142,
      "step": 500
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.1427602767944336,
      "learning_rate": 9.708553297322407e-06,
      "loss": 0.4138,
      "step": 525
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4284802675247192,
      "learning_rate": 8.901047305322172e-06,
      "loss": 0.4142,
      "step": 550
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6178677082061768,
      "learning_rate": 8.100742067936432e-06,
      "loss": 0.4138,
      "step": 575
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.2968213558197021,
      "learning_rate": 7.312881488436928e-06,
      "loss": 0.4125,
      "step": 600
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.2272377014160156,
      "learning_rate": 6.542627927979772e-06,
      "loss": 0.4108,
      "step": 625
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5362012386322021,
      "learning_rate": 5.795028379858355e-06,
      "loss": 0.4112,
      "step": 650
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.4780094623565674,
      "learning_rate": 5.074981399690219e-06,
      "loss": 0.413,
      "step": 675
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2684173583984375,
      "learning_rate": 4.3872050082238535e-06,
      "loss": 0.4136,
      "step": 700
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7533961534500122,
      "learning_rate": 3.736205777078381e-06,
      "loss": 0.4115,
      "step": 725
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0084352493286133,
      "learning_rate": 3.126249299978086e-06,
      "loss": 0.4125,
      "step": 750
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6931928396224976,
      "learning_rate": 2.5613322429654573e-06,
      "loss": 0.4122,
      "step": 775
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7414414882659912,
      "learning_rate": 2.0451561567303378e-06,
      "loss": 0.412,
      "step": 800
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7203795909881592,
      "learning_rate": 1.5811032226467304e-06,
      "loss": 0.4123,
      "step": 825
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.252686023712158,
      "learning_rate": 1.1722140914384162e-06,
      "loss": 0.4106,
      "step": 850
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8625959157943726,
      "learning_rate": 8.211679596828481e-07,
      "loss": 0.4118,
      "step": 875
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.6265811920166016,
      "learning_rate": 5.30265014699628e-07,
      "loss": 0.4125,
      "step": 900
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9654788970947266,
      "learning_rate": 3.0141136285129825e-07,
      "loss": 0.4115,
      "step": 925
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.8853169679641724,
      "learning_rate": 1.361065400119399e-07,
      "loss": 0.4122,
      "step": 950
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.8307050466537476,
      "learning_rate": 3.543368603973529e-08,
      "loss": 0.4112,
      "step": 975
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.647140622138977,
      "learning_rate": 5.244763404133046e-11,
      "loss": 0.4103,
      "step": 1000
    },
    {
      "epoch": 2.0,
      "step": 1000,
      "total_flos": 7576988418048000.0,
      "train_loss": 0.4733153915405273,
      "train_runtime": 560.9841,
      "train_samples_per_second": 28.521,
      "train_steps_per_second": 1.783
    }
  ],
  "logging_steps": 25,
  "max_steps": 1000,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 7576988418048000.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}