mistral-7b-base-sft-hh-harm…/trainer_state.json

{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.998003992015968,
  "eval_steps": 100,
  "global_step": 250,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.003992015968063872,
      "grad_norm": 71.5754165649414,
      "learning_rate": 0.0,
      "loss": 2.0095,
      "step": 1
    },
    {
      "epoch": 0.01996007984031936,
      "grad_norm": 25.808263778686523,
      "learning_rate": 3.2000000000000003e-06,
      "loss": 1.8226,
      "step": 5
    },
    {
      "epoch": 0.03992015968063872,
      "grad_norm": 10.55435562133789,
      "learning_rate": 7.2000000000000005e-06,
      "loss": 1.5338,
      "step": 10
    },
    {
      "epoch": 0.059880239520958084,
      "grad_norm": 13.148735046386719,
      "learning_rate": 1.1200000000000001e-05,
      "loss": 1.4367,
      "step": 15
    },
    {
      "epoch": 0.07984031936127745,
      "grad_norm": 6.636435031890869,
      "learning_rate": 1.5200000000000002e-05,
      "loss": 1.444,
      "step": 20
    },
    {
      "epoch": 0.0998003992015968,
      "grad_norm": 4.353740215301514,
      "learning_rate": 1.9200000000000003e-05,
      "loss": 1.4407,
      "step": 25
    },
    {
      "epoch": 0.11976047904191617,
      "grad_norm": 3.963563919067383,
      "learning_rate": 1.9984407641819812e-05,
      "loss": 1.4644,
      "step": 30
    },
    {
      "epoch": 0.13972055888223553,
      "grad_norm": 4.042232036590576,
      "learning_rate": 1.9921147013144782e-05,
      "loss": 1.4582,
      "step": 35
    },
    {
      "epoch": 0.1596806387225549,
      "grad_norm": 3.6070656776428223,
      "learning_rate": 1.9809551553491918e-05,
      "loss": 1.461,
      "step": 40
    },
    {
      "epoch": 0.17964071856287425,
      "grad_norm": 3.843057632446289,
      "learning_rate": 1.9650164944723116e-05,
      "loss": 1.4496,
      "step": 45
    },
    {
      "epoch": 0.1996007984031936,
      "grad_norm": 3.784003734588623,
      "learning_rate": 1.944376370237481e-05,
      "loss": 1.4632,
      "step": 50
    },
    {
      "epoch": 0.21956087824351297,
      "grad_norm": 3.471970319747925,
      "learning_rate": 1.9191353392552346e-05,
      "loss": 1.4363,
      "step": 55
    },
    {
      "epoch": 0.23952095808383234,
      "grad_norm": 3.609161615371704,
      "learning_rate": 1.889416373291298e-05,
      "loss": 1.4209,
      "step": 60
    },
    {
      "epoch": 0.25948103792415167,
      "grad_norm": 3.706693649291992,
      "learning_rate": 1.855364260160507e-05,
      "loss": 1.3991,
      "step": 65
    },
    {
      "epoch": 0.27944111776447106,
      "grad_norm": 3.828991174697876,
      "learning_rate": 1.8171448983351284e-05,
      "loss": 1.4168,
      "step": 70
    },
    {
      "epoch": 0.2994011976047904,
      "grad_norm": 3.53777813911438,
      "learning_rate": 1.7749444887041797e-05,
      "loss": 1.4197,
      "step": 75
    },
    {
      "epoch": 0.3193612774451098,
      "grad_norm": 3.46360182762146,
      "learning_rate": 1.7289686274214116e-05,
      "loss": 1.4041,
      "step": 80
    },
    {
      "epoch": 0.3393213572854291,
      "grad_norm": 3.3420891761779785,
      "learning_rate": 1.6794413042615168e-05,
      "loss": 1.361,
      "step": 85
    },
    {
      "epoch": 0.3592814371257485,
      "grad_norm": 3.3036203384399414,
      "learning_rate": 1.6266038113644605e-05,
      "loss": 1.3671,
      "step": 90
    },
    {
      "epoch": 0.37924151696606784,
      "grad_norm": 3.4878897666931152,
      "learning_rate": 1.570713567684432e-05,
      "loss": 1.346,
      "step": 95
    },
    {
      "epoch": 0.3992015968063872,
      "grad_norm": 4.090396404266357,
      "learning_rate": 1.5120428648705716e-05,
      "loss": 1.3645,
      "step": 100
    },
    {
      "epoch": 0.3992015968063872,
      "eval_loss": 1.3725436925888062,
      "eval_runtime": 4.6422,
      "eval_samples_per_second": 194.519,
      "eval_steps_per_second": 6.247,
      "step": 100
    },
    {
      "epoch": 0.41916167664670656,
      "grad_norm": 3.2958004474639893,
      "learning_rate": 1.4508775406894308e-05,
      "loss": 1.3203,
      "step": 105
    },
    {
      "epoch": 0.43912175648702595,
      "grad_norm": 3.205641746520996,
      "learning_rate": 1.3875155864521031e-05,
      "loss": 1.3251,
      "step": 110
    },
    {
      "epoch": 0.4590818363273453,
      "grad_norm": 3.419351100921631,
      "learning_rate": 1.3222656952305113e-05,
      "loss": 1.3093,
      "step": 115
    },
    {
      "epoch": 0.47904191616766467,
      "grad_norm": 3.5063862800598145,
      "learning_rate": 1.2554457579357906e-05,
      "loss": 1.297,
      "step": 120
    },
    {
      "epoch": 0.499001996007984,
      "grad_norm": 3.2938807010650635,
      "learning_rate": 1.187381314585725e-05,
      "loss": 1.2889,
      "step": 125
    },
    {
      "epoch": 0.5189620758483033,
      "grad_norm": 3.2896780967712402,
      "learning_rate": 1.1184039683065014e-05,
      "loss": 1.2707,
      "step": 130
    },
    {
      "epoch": 0.5389221556886228,
      "grad_norm": 3.1759278774261475,
      "learning_rate": 1.0488497697956134e-05,
      "loss": 1.2518,
      "step": 135
    },
    {
      "epoch": 0.5588822355289421,
      "grad_norm": 3.616849422454834,
      "learning_rate": 9.790575801166432e-06,
      "loss": 1.2737,
      "step": 140
    },
    {
      "epoch": 0.5788423153692615,
      "grad_norm": 3.459834098815918,
      "learning_rate": 9.093674198022201e-06,
      "loss": 1.2496,
      "step": 145
    },
    {
      "epoch": 0.5988023952095808,
      "grad_norm": 3.072103261947632,
      "learning_rate": 8.401188123081653e-06,
      "loss": 1.2129,
      "step": 150
    },
    {
      "epoch": 0.6187624750499002,
      "grad_norm": 3.2528676986694336,
      "learning_rate": 7.716491298893443e-06,
      "loss": 1.2096,
      "step": 155
    },
    {
      "epoch": 0.6387225548902196,
      "grad_norm": 3.041900157928467,
      "learning_rate": 7.042919499559538e-06,
      "loss": 1.2171,
      "step": 160
    },
    {
      "epoch": 0.6586826347305389,
      "grad_norm": 3.830709457397461,
      "learning_rate": 6.383754299179079e-06,
      "loss": 1.2038,
      "step": 165
    },
    {
      "epoch": 0.6786427145708582,
      "grad_norm": 3.1818060874938965,
      "learning_rate": 5.742207084349274e-06,
      "loss": 1.1999,
      "step": 170
    },
    {
      "epoch": 0.6986027944111777,
      "grad_norm": 3.237358331680298,
      "learning_rate": 5.121403408612672e-06,
      "loss": 1.1821,
      "step": 175
    },
    {
      "epoch": 0.718562874251497,
      "grad_norm": 3.207139015197754,
      "learning_rate": 4.524367765074499e-06,
      "loss": 1.1617,
      "step": 180
    },
    {
      "epoch": 0.7385229540918163,
      "grad_norm": 3.0992743968963623,
      "learning_rate": 3.954008851376252e-06,
      "loss": 1.1629,
      "step": 185
    },
    {
      "epoch": 0.7584830339321357,
      "grad_norm": 3.1126255989074707,
      "learning_rate": 3.4131053988131947e-06,
      "loss": 1.1688,
      "step": 190
    },
    {
      "epoch": 0.7784431137724551,
      "grad_norm": 3.3172667026519775,
      "learning_rate": 2.9042926346347932e-06,
      "loss": 1.1507,
      "step": 195
    },
    {
      "epoch": 0.7984031936127745,
      "grad_norm": 3.125807762145996,
      "learning_rate": 2.4300494434824373e-06,
      "loss": 1.1459,
      "step": 200
    },
    {
      "epoch": 0.7984031936127745,
      "eval_loss": 1.1677805185317993,
      "eval_runtime": 4.6292,
      "eval_samples_per_second": 195.067,
      "eval_steps_per_second": 6.265,
      "step": 200
    },
    {
      "epoch": 0.8183632734530938,
      "grad_norm": 3.1806719303131104,
      "learning_rate": 1.9926862905126663e-06,
      "loss": 1.1508,
      "step": 205
    },
    {
      "epoch": 0.8383233532934131,
      "grad_norm": 3.2433359622955322,
      "learning_rate": 1.5943339650431578e-06,
      "loss": 1.1156,
      "step": 210
    },
    {
      "epoch": 0.8582834331337326,
      "grad_norm": 3.1037845611572266,
      "learning_rate": 1.2369331995613664e-06,
      "loss": 1.1278,
      "step": 215
    },
    {
      "epoch": 0.8782435129740519,
      "grad_norm": 3.121793270111084,
      "learning_rate": 9.222252146709143e-07,
      "loss": 1.1291,
      "step": 220
    },
    {
      "epoch": 0.8982035928143712,
      "grad_norm": 3.311478614807129,
      "learning_rate": 6.517432360398556e-07,
      "loss": 1.1606,
      "step": 225
    },
    {
      "epoch": 0.9181636726546906,
      "grad_norm": 3.1572906970977783,
      "learning_rate": 4.268050246793276e-07,
      "loss": 1.1376,
      "step": 230
    },
    {
      "epoch": 0.93812375249501,
      "grad_norm": 3.125819683074951,
      "learning_rate": 2.4850645694436736e-07,
      "loss": 1.1042,
      "step": 235
    },
    {
      "epoch": 0.9580838323353293,
      "grad_norm": 3.240495443344116,
      "learning_rate": 1.1771618553447217e-07,
      "loss": 1.1349,
      "step": 240
    },
    {
      "epoch": 0.9780439121756487,
      "grad_norm": 3.0710411071777344,
      "learning_rate": 3.50714075049563e-08,
      "loss": 1.1139,
      "step": 245
    },
    {
      "epoch": 0.998003992015968,
      "grad_norm": 3.2199409008026123,
      "learning_rate": 9.74759906957612e-10,
      "loss": 1.1324,
      "step": 250
    },
    {
      "epoch": 0.998003992015968,
      "step": 250,
      "total_flos": 8.741444925364634e+16,
      "train_loss": 1.2971151485443115,
      "train_runtime": 891.5874,
      "train_samples_per_second": 17.966,
      "train_steps_per_second": 0.28
    }
  ],
  "logging_steps": 5,
  "max_steps": 250,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 200,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 8.741444925364634e+16,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}
初始化项目，由ModelHub XC社区提供模型 Model: W-61/mistral-7b-base-sft-hh-harmless-4xh200-batch-64 Source: Original Platform 2026-04-22 11:22:52 +08:00			`{`
			`"best_global_step": null,`
			`"best_metric": null,`
			`"best_model_checkpoint": null,`
			`"epoch": 0.998003992015968,`
			`"eval_steps": 100,`
			`"global_step": 250,`
			`"is_hyper_param_search": false,`
			`"is_local_process_zero": true,`
			`"is_world_process_zero": true,`
			`"log_history": [`
			`{`
			`"epoch": 0.003992015968063872,`
			`"grad_norm": 71.5754165649414,`
			`"learning_rate": 0.0,`
			`"loss": 2.0095,`
			`"step": 1`
			`},`
			`{`
			`"epoch": 0.01996007984031936,`
			`"grad_norm": 25.808263778686523,`
			`"learning_rate": 3.2000000000000003e-06,`
			`"loss": 1.8226,`
			`"step": 5`
			`},`
			`{`
			`"epoch": 0.03992015968063872,`
			`"grad_norm": 10.55435562133789,`
			`"learning_rate": 7.2000000000000005e-06,`
			`"loss": 1.5338,`
			`"step": 10`
			`},`
			`{`
			`"epoch": 0.059880239520958084,`
			`"grad_norm": 13.148735046386719,`
			`"learning_rate": 1.1200000000000001e-05,`
			`"loss": 1.4367,`
			`"step": 15`
			`},`
			`{`
			`"epoch": 0.07984031936127745,`
			`"grad_norm": 6.636435031890869,`
			`"learning_rate": 1.5200000000000002e-05,`
			`"loss": 1.444,`
			`"step": 20`
			`},`
			`{`
			`"epoch": 0.0998003992015968,`
			`"grad_norm": 4.353740215301514,`
			`"learning_rate": 1.9200000000000003e-05,`
			`"loss": 1.4407,`
			`"step": 25`
			`},`
			`{`
			`"epoch": 0.11976047904191617,`
			`"grad_norm": 3.963563919067383,`
			`"learning_rate": 1.9984407641819812e-05,`
			`"loss": 1.4644,`
			`"step": 30`
			`},`
			`{`
			`"epoch": 0.13972055888223553,`
			`"grad_norm": 4.042232036590576,`
			`"learning_rate": 1.9921147013144782e-05,`
			`"loss": 1.4582,`
			`"step": 35`
			`},`
			`{`
			`"epoch": 0.1596806387225549,`
			`"grad_norm": 3.6070656776428223,`
			`"learning_rate": 1.9809551553491918e-05,`
			`"loss": 1.461,`
			`"step": 40`
			`},`
			`{`
			`"epoch": 0.17964071856287425,`
			`"grad_norm": 3.843057632446289,`
			`"learning_rate": 1.9650164944723116e-05,`
			`"loss": 1.4496,`
			`"step": 45`
			`},`
			`{`
			`"epoch": 0.1996007984031936,`
			`"grad_norm": 3.784003734588623,`
			`"learning_rate": 1.944376370237481e-05,`
			`"loss": 1.4632,`
			`"step": 50`
			`},`
			`{`
			`"epoch": 0.21956087824351297,`
			`"grad_norm": 3.471970319747925,`
			`"learning_rate": 1.9191353392552346e-05,`
			`"loss": 1.4363,`
			`"step": 55`
			`},`
			`{`
			`"epoch": 0.23952095808383234,`
			`"grad_norm": 3.609161615371704,`
			`"learning_rate": 1.889416373291298e-05,`
			`"loss": 1.4209,`
			`"step": 60`
			`},`
			`{`
			`"epoch": 0.25948103792415167,`
			`"grad_norm": 3.706693649291992,`
			`"learning_rate": 1.855364260160507e-05,`
			`"loss": 1.3991,`
			`"step": 65`
			`},`
			`{`
			`"epoch": 0.27944111776447106,`
			`"grad_norm": 3.828991174697876,`
			`"learning_rate": 1.8171448983351284e-05,`
			`"loss": 1.4168,`
			`"step": 70`
			`},`
			`{`
			`"epoch": 0.2994011976047904,`
			`"grad_norm": 3.53777813911438,`
			`"learning_rate": 1.7749444887041797e-05,`
			`"loss": 1.4197,`
			`"step": 75`
			`},`
			`{`
			`"epoch": 0.3193612774451098,`
			`"grad_norm": 3.46360182762146,`
			`"learning_rate": 1.7289686274214116e-05,`
			`"loss": 1.4041,`
			`"step": 80`
			`},`
			`{`
			`"epoch": 0.3393213572854291,`
			`"grad_norm": 3.3420891761779785,`
			`"learning_rate": 1.6794413042615168e-05,`
			`"loss": 1.361,`
			`"step": 85`
			`},`
			`{`
			`"epoch": 0.3592814371257485,`
			`"grad_norm": 3.3036203384399414,`
			`"learning_rate": 1.6266038113644605e-05,`
			`"loss": 1.3671,`
			`"step": 90`
			`},`
			`{`
			`"epoch": 0.37924151696606784,`
			`"grad_norm": 3.4878897666931152,`
			`"learning_rate": 1.570713567684432e-05,`
			`"loss": 1.346,`
			`"step": 95`
			`},`
			`{`
			`"epoch": 0.3992015968063872,`
			`"grad_norm": 4.090396404266357,`
			`"learning_rate": 1.5120428648705716e-05,`
			`"loss": 1.3645,`
			`"step": 100`
			`},`
			`{`
			`"epoch": 0.3992015968063872,`
			`"eval_loss": 1.3725436925888062,`
			`"eval_runtime": 4.6422,`
			`"eval_samples_per_second": 194.519,`
			`"eval_steps_per_second": 6.247,`
			`"step": 100`
			`},`
			`{`
			`"epoch": 0.41916167664670656,`
			`"grad_norm": 3.2958004474639893,`
			`"learning_rate": 1.4508775406894308e-05,`
			`"loss": 1.3203,`
			`"step": 105`
			`},`
			`{`
			`"epoch": 0.43912175648702595,`
			`"grad_norm": 3.205641746520996,`
			`"learning_rate": 1.3875155864521031e-05,`
			`"loss": 1.3251,`
			`"step": 110`
			`},`
			`{`
			`"epoch": 0.4590818363273453,`
			`"grad_norm": 3.419351100921631,`
			`"learning_rate": 1.3222656952305113e-05,`
			`"loss": 1.3093,`
			`"step": 115`
			`},`
			`{`
			`"epoch": 0.47904191616766467,`
			`"grad_norm": 3.5063862800598145,`
			`"learning_rate": 1.2554457579357906e-05,`
			`"loss": 1.297,`
			`"step": 120`
			`},`
			`{`
			`"epoch": 0.499001996007984,`
			`"grad_norm": 3.2938807010650635,`
			`"learning_rate": 1.187381314585725e-05,`
			`"loss": 1.2889,`
			`"step": 125`
			`},`
			`{`
			`"epoch": 0.5189620758483033,`
			`"grad_norm": 3.2896780967712402,`
			`"learning_rate": 1.1184039683065014e-05,`
			`"loss": 1.2707,`
			`"step": 130`
			`},`
			`{`
			`"epoch": 0.5389221556886228,`
			`"grad_norm": 3.1759278774261475,`
			`"learning_rate": 1.0488497697956134e-05,`
			`"loss": 1.2518,`
			`"step": 135`
			`},`
			`{`
			`"epoch": 0.5588822355289421,`
			`"grad_norm": 3.616849422454834,`
			`"learning_rate": 9.790575801166432e-06,`
			`"loss": 1.2737,`
			`"step": 140`
			`},`
			`{`
			`"epoch": 0.5788423153692615,`
			`"grad_norm": 3.459834098815918,`
			`"learning_rate": 9.093674198022201e-06,`
			`"loss": 1.2496,`
			`"step": 145`
			`},`
			`{`
			`"epoch": 0.5988023952095808,`
			`"grad_norm": 3.072103261947632,`
			`"learning_rate": 8.401188123081653e-06,`
			`"loss": 1.2129,`
			`"step": 150`
			`},`
			`{`
			`"epoch": 0.6187624750499002,`
			`"grad_norm": 3.2528676986694336,`
			`"learning_rate": 7.716491298893443e-06,`
			`"loss": 1.2096,`
			`"step": 155`
			`},`
			`{`
			`"epoch": 0.6387225548902196,`
			`"grad_norm": 3.041900157928467,`
			`"learning_rate": 7.042919499559538e-06,`
			`"loss": 1.2171,`
			`"step": 160`
			`},`
			`{`
			`"epoch": 0.6586826347305389,`
			`"grad_norm": 3.830709457397461,`
			`"learning_rate": 6.383754299179079e-06,`
			`"loss": 1.2038,`
			`"step": 165`
			`},`
			`{`
			`"epoch": 0.6786427145708582,`
			`"grad_norm": 3.1818060874938965,`
			`"learning_rate": 5.742207084349274e-06,`
			`"loss": 1.1999,`
			`"step": 170`
			`},`
			`{`
			`"epoch": 0.6986027944111777,`
			`"grad_norm": 3.237358331680298,`
			`"learning_rate": 5.121403408612672e-06,`
			`"loss": 1.1821,`
			`"step": 175`
			`},`
			`{`
			`"epoch": 0.718562874251497,`
			`"grad_norm": 3.207139015197754,`
			`"learning_rate": 4.524367765074499e-06,`
			`"loss": 1.1617,`
			`"step": 180`
			`},`
			`{`
			`"epoch": 0.7385229540918163,`
			`"grad_norm": 3.0992743968963623,`
			`"learning_rate": 3.954008851376252e-06,`
			`"loss": 1.1629,`
			`"step": 185`
			`},`
			`{`
			`"epoch": 0.7584830339321357,`
			`"grad_norm": 3.1126255989074707,`
			`"learning_rate": 3.4131053988131947e-06,`
			`"loss": 1.1688,`
			`"step": 190`
			`},`
			`{`
			`"epoch": 0.7784431137724551,`
			`"grad_norm": 3.3172667026519775,`
			`"learning_rate": 2.9042926346347932e-06,`
			`"loss": 1.1507,`
			`"step": 195`
			`},`
			`{`
			`"epoch": 0.7984031936127745,`
			`"grad_norm": 3.125807762145996,`
			`"learning_rate": 2.4300494434824373e-06,`
			`"loss": 1.1459,`
			`"step": 200`
			`},`
			`{`
			`"epoch": 0.7984031936127745,`
			`"eval_loss": 1.1677805185317993,`
			`"eval_runtime": 4.6292,`
			`"eval_samples_per_second": 195.067,`
			`"eval_steps_per_second": 6.265,`
			`"step": 200`
			`},`
			`{`
			`"epoch": 0.8183632734530938,`
			`"grad_norm": 3.1806719303131104,`
			`"learning_rate": 1.9926862905126663e-06,`
			`"loss": 1.1508,`
			`"step": 205`
			`},`
			`{`
			`"epoch": 0.8383233532934131,`
			`"grad_norm": 3.2433359622955322,`
			`"learning_rate": 1.5943339650431578e-06,`
			`"loss": 1.1156,`
			`"step": 210`
			`},`
			`{`
			`"epoch": 0.8582834331337326,`
			`"grad_norm": 3.1037845611572266,`
			`"learning_rate": 1.2369331995613664e-06,`
			`"loss": 1.1278,`
			`"step": 215`
			`},`
			`{`
			`"epoch": 0.8782435129740519,`
			`"grad_norm": 3.121793270111084,`
			`"learning_rate": 9.222252146709143e-07,`
			`"loss": 1.1291,`
			`"step": 220`
			`},`
			`{`
			`"epoch": 0.8982035928143712,`
			`"grad_norm": 3.311478614807129,`
			`"learning_rate": 6.517432360398556e-07,`
			`"loss": 1.1606,`
			`"step": 225`
			`},`
			`{`
			`"epoch": 0.9181636726546906,`
			`"grad_norm": 3.1572906970977783,`
			`"learning_rate": 4.268050246793276e-07,`
			`"loss": 1.1376,`
			`"step": 230`
			`},`
			`{`
			`"epoch": 0.93812375249501,`
			`"grad_norm": 3.125819683074951,`
			`"learning_rate": 2.4850645694436736e-07,`
			`"loss": 1.1042,`
			`"step": 235`
			`},`
			`{`
			`"epoch": 0.9580838323353293,`
			`"grad_norm": 3.240495443344116,`
			`"learning_rate": 1.1771618553447217e-07,`
			`"loss": 1.1349,`
			`"step": 240`
			`},`
			`{`
			`"epoch": 0.9780439121756487,`
			`"grad_norm": 3.0710411071777344,`
			`"learning_rate": 3.50714075049563e-08,`
			`"loss": 1.1139,`
			`"step": 245`
			`},`
			`{`
			`"epoch": 0.998003992015968,`
			`"grad_norm": 3.2199409008026123,`
			`"learning_rate": 9.74759906957612e-10,`
			`"loss": 1.1324,`
			`"step": 250`
			`},`
			`{`
			`"epoch": 0.998003992015968,`
			`"step": 250,`
			`"total_flos": 8.741444925364634e+16,`
			`"train_loss": 1.2971151485443115,`
			`"train_runtime": 891.5874,`
			`"train_samples_per_second": 17.966,`
			`"train_steps_per_second": 0.28`
			`}`
			`],`
			`"logging_steps": 5,`
			`"max_steps": 250,`
			`"num_input_tokens_seen": 0,`
			`"num_train_epochs": 1,`
			`"save_steps": 200,`
			`"stateful_callbacks": {`
			`"TrainerControl": {`
			`"args": {`
			`"should_epoch_stop": false,`
			`"should_evaluate": false,`
			`"should_log": false,`
			`"should_save": true,`
			`"should_training_stop": true`
			`},`
			`"attributes": {}`
			`}`
			`},`
			`"total_flos": 8.741444925364634e+16,`
			`"train_batch_size": 8,`
			`"trial_name": null,`
			`"trial_params": null`
			`}`