Files
Crawl-Qwen2.5-3B-coder/trainer_state.json
ModelHub XC f52362a48a 初始化项目,由ModelHub XC社区提供模型
Model: rqwfwef/Crawl-Qwen2.5-3B-coder
Source: Original Platform
2026-06-18 12:14:13 +08:00

281 lines
7.6 KiB
JSON

{
"best_metric": 1.06145525,
"best_model_checkpoint": "/data/coding/ms-swift/output/v7-20250220-132503/checkpoint-108",
"epoch": 1.9829351535836177,
"eval_steps": 50,
"global_step": 108,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01820250284414107,
"grad_norm": 6.1875,
"learning_rate": 1.6666666666666667e-06,
"loss": 1.5638728141784668,
"memory(GiB)": 29.73,
"step": 1,
"token_acc": 0.6939769707705934,
"train_speed(iter/s)": 0.129066
},
{
"epoch": 0.09101251422070535,
"grad_norm": 4.96875,
"learning_rate": 8.333333333333334e-06,
"loss": 1.518846035003662,
"memory(GiB)": 41.66,
"step": 5,
"token_acc": 0.7012233049968899,
"train_speed(iter/s)": 0.226379
},
{
"epoch": 0.1820250284414107,
"grad_norm": 3.515625,
"learning_rate": 9.96210254835968e-06,
"loss": 1.3327471733093261,
"memory(GiB)": 41.66,
"step": 10,
"token_acc": 0.7161527878935017,
"train_speed(iter/s)": 0.240583
},
{
"epoch": 0.27303754266211605,
"grad_norm": 2.296875,
"learning_rate": 9.809128215864096e-06,
"loss": 1.1619236946105957,
"memory(GiB)": 45.97,
"step": 15,
"token_acc": 0.7329187688216199,
"train_speed(iter/s)": 0.245364
},
{
"epoch": 0.3640500568828214,
"grad_norm": 2.296875,
"learning_rate": 9.542326359097619e-06,
"loss": 1.1952880859375,
"memory(GiB)": 45.97,
"step": 20,
"token_acc": 0.7218057637847742,
"train_speed(iter/s)": 0.252196
},
{
"epoch": 0.4550625711035267,
"grad_norm": 2.328125,
"learning_rate": 9.168011926105598e-06,
"loss": 1.111426544189453,
"memory(GiB)": 45.97,
"step": 25,
"token_acc": 0.7425333872925941,
"train_speed(iter/s)": 0.251882
},
{
"epoch": 0.5460750853242321,
"grad_norm": 2.578125,
"learning_rate": 8.695044586103297e-06,
"loss": 1.1071309089660644,
"memory(GiB)": 45.97,
"step": 30,
"token_acc": 0.7414093361083974,
"train_speed(iter/s)": 0.254082
},
{
"epoch": 0.6370875995449374,
"grad_norm": 2.078125,
"learning_rate": 8.134619029470535e-06,
"loss": 1.0020055770874023,
"memory(GiB)": 45.97,
"step": 35,
"token_acc": 0.7621102932675633,
"train_speed(iter/s)": 0.251894
},
{
"epoch": 0.7281001137656428,
"grad_norm": 2.359375,
"learning_rate": 7.500000000000001e-06,
"loss": 1.0203259468078614,
"memory(GiB)": 50.29,
"step": 40,
"token_acc": 0.7580885395117914,
"train_speed(iter/s)": 0.252412
},
{
"epoch": 0.8191126279863481,
"grad_norm": 2.21875,
"learning_rate": 6.806208330935766e-06,
"loss": 0.9908183097839356,
"memory(GiB)": 50.29,
"step": 45,
"token_acc": 0.7667093258473352,
"train_speed(iter/s)": 0.252266
},
{
"epoch": 0.9101251422070534,
"grad_norm": 2.5,
"learning_rate": 6.0696654160324875e-06,
"loss": 1.045759677886963,
"memory(GiB)": 50.29,
"step": 50,
"token_acc": 0.7470934799685781,
"train_speed(iter/s)": 0.251372
},
{
"epoch": 0.9101251422070534,
"eval_loss": 1.073840618133545,
"eval_runtime": 0.6301,
"eval_samples_per_second": 68.243,
"eval_steps_per_second": 14.283,
"eval_token_acc": 0.7637732857709076,
"step": 50
},
{
"epoch": 1.018202502844141,
"grad_norm": 3.59375,
"learning_rate": 5.3078045306697154e-06,
"loss": 1.223165225982666,
"memory(GiB)": 57.52,
"step": 55,
"token_acc": 0.7527071344595012,
"train_speed(iter/s)": 0.246897
},
{
"epoch": 1.1092150170648465,
"grad_norm": 2.328125,
"learning_rate": 4.53865820268349e-06,
"loss": 1.0227657318115235,
"memory(GiB)": 57.52,
"step": 60,
"token_acc": 0.755286734276229,
"train_speed(iter/s)": 0.248832
},
{
"epoch": 1.2002275312855517,
"grad_norm": 2.03125,
"learning_rate": 3.7804313994581143e-06,
"loss": 0.9702803611755371,
"memory(GiB)": 57.52,
"step": 65,
"token_acc": 0.7658987281017519,
"train_speed(iter/s)": 0.248641
},
{
"epoch": 1.2912400455062572,
"grad_norm": 2.3125,
"learning_rate": 3.0510706335366034e-06,
"loss": 1.0110454559326172,
"memory(GiB)": 57.52,
"step": 70,
"token_acc": 0.7643304928863696,
"train_speed(iter/s)": 0.248241
},
{
"epoch": 1.3822525597269624,
"grad_norm": 2.390625,
"learning_rate": 2.3678391856132203e-06,
"loss": 0.9339286804199218,
"memory(GiB)": 57.52,
"step": 75,
"token_acc": 0.7785570747468379,
"train_speed(iter/s)": 0.247963
},
{
"epoch": 1.4732650739476678,
"grad_norm": 1.953125,
"learning_rate": 1.746908498978791e-06,
"loss": 0.9071330070495606,
"memory(GiB)": 57.52,
"step": 80,
"token_acc": 0.7775242441528808,
"train_speed(iter/s)": 0.248416
},
{
"epoch": 1.5642775881683733,
"grad_norm": 2.203125,
"learning_rate": 1.202975416726464e-06,
"loss": 1.0261162757873534,
"memory(GiB)": 57.52,
"step": 85,
"token_acc": 0.7509206426287888,
"train_speed(iter/s)": 0.248819
},
{
"epoch": 1.6552901023890785,
"grad_norm": 2.078125,
"learning_rate": 7.489143213519301e-07,
"loss": 0.995113468170166,
"memory(GiB)": 57.52,
"step": 90,
"token_acc": 0.7631283572516636,
"train_speed(iter/s)": 0.24959
},
{
"epoch": 1.7463026166097837,
"grad_norm": 2.1875,
"learning_rate": 3.9547241027523164e-07,
"loss": 0.9445444107055664,
"memory(GiB)": 57.52,
"step": 95,
"token_acc": 0.7700910688608404,
"train_speed(iter/s)": 0.249979
},
{
"epoch": 1.8373151308304891,
"grad_norm": 2.03125,
"learning_rate": 1.510153198249531e-07,
"loss": 0.9724701881408692,
"memory(GiB)": 57.52,
"step": 100,
"token_acc": 0.7649354027573051,
"train_speed(iter/s)": 0.250388
},
{
"epoch": 1.8373151308304891,
"eval_loss": 1.0617759227752686,
"eval_runtime": 0.6342,
"eval_samples_per_second": 67.805,
"eval_steps_per_second": 14.192,
"eval_token_acc": 0.7669441141498217,
"step": 100
},
{
"epoch": 1.9283276450511946,
"grad_norm": 2.1875,
"learning_rate": 2.1329118524827662e-08,
"loss": 0.9729574203491211,
"memory(GiB)": 57.52,
"step": 105,
"token_acc": 0.7660818713450293,
"train_speed(iter/s)": 0.250386
},
{
"epoch": 1.9829351535836177,
"eval_loss": 1.061455249786377,
"eval_runtime": 0.6272,
"eval_samples_per_second": 68.563,
"eval_steps_per_second": 14.35,
"eval_token_acc": 0.7653586999603647,
"step": 108
}
],
"logging_steps": 5,
"max_steps": 108,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.19180283271168e+16,
"train_batch_size": 5,
"trial_name": null,
"trial_params": null
}