Files
qwen2.5vl-3b-sampled_2500_q…/trainer_state.json
ModelHub XC 5518cc1f4f 初始化项目,由ModelHub XC社区提供模型
Model: waltonfuture/qwen2.5vl-3b-sampled_2500_qwen2.5vl32b
Source: Original Platform
2026-05-22 02:30:13 +08:00

319 lines
8.9 KiB
JSON

{
"best_global_step": 40,
"best_metric": 0.29921636,
"best_model_checkpoint": "/data/home/scyb089/CODE/scripts/ms-swift/3b-new/v24-20250507-100951/checkpoint-40",
"epoch": 2.9305331179321485,
"eval_steps": 20,
"global_step": 114,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.025848142164781908,
"grad_norm": 2.496293067932129,
"learning_rate": 9.998101535124758e-06,
"loss": 0.3740294575691223,
"memory(GiB)": 27.73,
"step": 1,
"token_acc": 0.8633032214078745,
"train_speed(iter/s)": 0.068625
},
{
"epoch": 0.12924071082390953,
"grad_norm": 1.4705803394317627,
"learning_rate": 9.952610423187516e-06,
"loss": 0.3252382278442383,
"memory(GiB)": 29.52,
"step": 5,
"token_acc": 0.8930616812586399,
"train_speed(iter/s)": 0.123788
},
{
"epoch": 0.25848142164781907,
"grad_norm": 1.208221435546875,
"learning_rate": 9.811340001546252e-06,
"loss": 0.31452901363372804,
"memory(GiB)": 31.84,
"step": 10,
"token_acc": 0.9054922186029678,
"train_speed(iter/s)": 0.138172
},
{
"epoch": 0.3877221324717286,
"grad_norm": 0.9632206559181213,
"learning_rate": 9.578866633275289e-06,
"loss": 0.27907500267028806,
"memory(GiB)": 31.84,
"step": 15,
"token_acc": 0.9115719379194631,
"train_speed(iter/s)": 0.141744
},
{
"epoch": 0.5169628432956381,
"grad_norm": 0.8365996479988098,
"learning_rate": 9.259597044191635e-06,
"loss": 0.27438764572143554,
"memory(GiB)": 31.84,
"step": 20,
"token_acc": 0.9132246566726072,
"train_speed(iter/s)": 0.144692
},
{
"epoch": 0.5169628432956381,
"eval_loss": 0.3188421130180359,
"eval_runtime": 1.163,
"eval_samples_per_second": 21.496,
"eval_steps_per_second": 6.019,
"eval_token_acc": 0.9051328304362086,
"step": 20
},
{
"epoch": 0.6462035541195477,
"grad_norm": 0.8677796125411987,
"learning_rate": 8.859583254581604e-06,
"loss": 0.26254222393035886,
"memory(GiB)": 31.84,
"step": 25,
"token_acc": 0.912092040385067,
"train_speed(iter/s)": 0.137482
},
{
"epoch": 0.7754442649434572,
"grad_norm": 0.7865857481956482,
"learning_rate": 8.386407858128707e-06,
"loss": 0.2681217908859253,
"memory(GiB)": 31.85,
"step": 30,
"token_acc": 0.8996108553551899,
"train_speed(iter/s)": 0.140721
},
{
"epoch": 0.9046849757673667,
"grad_norm": 0.7490976452827454,
"learning_rate": 7.849040287551331e-06,
"loss": 0.25801796913146974,
"memory(GiB)": 31.85,
"step": 35,
"token_acc": 0.9076438224453822,
"train_speed(iter/s)": 0.14257
},
{
"epoch": 1.0258481421647818,
"grad_norm": 1.092894434928894,
"learning_rate": 7.257666791554448e-06,
"loss": 0.27177181243896487,
"memory(GiB)": 31.85,
"step": 40,
"token_acc": 0.9122958797447821,
"train_speed(iter/s)": 0.145575
},
{
"epoch": 1.0258481421647818,
"eval_loss": 0.2992163598537445,
"eval_runtime": 1.1551,
"eval_samples_per_second": 21.643,
"eval_steps_per_second": 6.06,
"eval_token_acc": 0.9095605116431617,
"step": 40
},
{
"epoch": 1.1550888529886914,
"grad_norm": 0.7573268413543701,
"learning_rate": 6.6234973460234184e-06,
"loss": 0.19469616413116456,
"memory(GiB)": 31.85,
"step": 45,
"token_acc": 0.9224711780868482,
"train_speed(iter/s)": 0.140248
},
{
"epoch": 1.284329563812601,
"grad_norm": 0.8084748387336731,
"learning_rate": 5.958553159618693e-06,
"loss": 0.1806863307952881,
"memory(GiB)": 31.85,
"step": 50,
"token_acc": 0.9398337785693084,
"train_speed(iter/s)": 0.141394
},
{
"epoch": 1.4135702746365104,
"grad_norm": 0.867699146270752,
"learning_rate": 5.275438801779328e-06,
"loss": 0.17451841831207277,
"memory(GiB)": 31.85,
"step": 55,
"token_acc": 0.9418562744768266,
"train_speed(iter/s)": 0.142869
},
{
"epoch": 1.5428109854604202,
"grad_norm": 0.663566529750824,
"learning_rate": 4.587103272638339e-06,
"loss": 0.17188454866409303,
"memory(GiB)": 31.85,
"step": 60,
"token_acc": 0.9462987886944818,
"train_speed(iter/s)": 0.144513
},
{
"epoch": 1.5428109854604202,
"eval_loss": 0.30524736642837524,
"eval_runtime": 1.157,
"eval_samples_per_second": 21.607,
"eval_steps_per_second": 6.05,
"eval_token_acc": 0.9102164644145622,
"step": 60
},
{
"epoch": 1.6720516962843295,
"grad_norm": 0.7321382761001587,
"learning_rate": 3.906594543968122e-06,
"loss": 0.17072482109069825,
"memory(GiB)": 31.85,
"step": 65,
"token_acc": 0.9364743108441489,
"train_speed(iter/s)": 0.141054
},
{
"epoch": 1.8012924071082392,
"grad_norm": 0.7696079015731812,
"learning_rate": 3.2468122240362287e-06,
"loss": 0.1751842737197876,
"memory(GiB)": 31.85,
"step": 70,
"token_acc": 0.9407936548287872,
"train_speed(iter/s)": 0.142529
},
{
"epoch": 1.9305331179321485,
"grad_norm": 0.7385942339897156,
"learning_rate": 2.6202630348146323e-06,
"loss": 0.16539106369018555,
"memory(GiB)": 31.85,
"step": 75,
"token_acc": 0.9483229542226592,
"train_speed(iter/s)": 0.143018
},
{
"epoch": 2.0516962843295636,
"grad_norm": 0.7131247520446777,
"learning_rate": 2.0388237366751005e-06,
"loss": 0.1537123441696167,
"memory(GiB)": 31.85,
"step": 80,
"token_acc": 0.9502986451706891,
"train_speed(iter/s)": 0.14475
},
{
"epoch": 2.0516962843295636,
"eval_loss": 0.30163297057151794,
"eval_runtime": 1.1613,
"eval_samples_per_second": 21.528,
"eval_steps_per_second": 6.028,
"eval_token_acc": 0.9122663168251886,
"step": 80
},
{
"epoch": 2.1809369951534734,
"grad_norm": 0.6918891668319702,
"learning_rate": 1.5135159945300232e-06,
"loss": 0.1329301118850708,
"memory(GiB)": 31.85,
"step": 85,
"token_acc": 0.9482836196172882,
"train_speed(iter/s)": 0.142599
},
{
"epoch": 2.3101777059773827,
"grad_norm": 0.6953617930412292,
"learning_rate": 1.0542974530180327e-06,
"loss": 0.13516383171081542,
"memory(GiB)": 31.85,
"step": 90,
"token_acc": 0.95539407490218,
"train_speed(iter/s)": 0.143367
},
{
"epoch": 2.4394184168012925,
"grad_norm": 0.6941922307014465,
"learning_rate": 6.698729810778065e-07,
"loss": 0.12188678979873657,
"memory(GiB)": 31.85,
"step": 95,
"token_acc": 0.9622745490981964,
"train_speed(iter/s)": 0.143797
},
{
"epoch": 2.568659127625202,
"grad_norm": 0.6733763217926025,
"learning_rate": 3.675296639259912e-07,
"loss": 0.12675585746765136,
"memory(GiB)": 31.85,
"step": 100,
"token_acc": 0.9584026622296173,
"train_speed(iter/s)": 0.144021
},
{
"epoch": 2.568659127625202,
"eval_loss": 0.31649884581565857,
"eval_runtime": 1.1563,
"eval_samples_per_second": 21.621,
"eval_steps_per_second": 6.054,
"eval_token_acc": 0.9116923581502132,
"step": 100
},
{
"epoch": 2.6978998384491115,
"grad_norm": 0.6838268637657166,
"learning_rate": 1.5299867030334815e-07,
"loss": 0.11652226448059082,
"memory(GiB)": 31.85,
"step": 105,
"token_acc": 0.9495417438773978,
"train_speed(iter/s)": 0.142047
},
{
"epoch": 2.827140549273021,
"grad_norm": 0.7604183554649353,
"learning_rate": 3.034661341025258e-08,
"loss": 0.12787914276123047,
"memory(GiB)": 31.85,
"step": 110,
"token_acc": 0.9542654419448875,
"train_speed(iter/s)": 0.142691
},
{
"epoch": 2.9305331179321485,
"eval_loss": 0.31783103942871094,
"eval_runtime": 1.1534,
"eval_samples_per_second": 21.674,
"eval_steps_per_second": 6.069,
"eval_token_acc": 0.9116103640537881,
"step": 114
}
],
"logging_steps": 5,
"max_steps": 114,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 20,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.3995644452601856e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}