初始化项目,由ModelHub XC社区提供模型

Model: waleko/Qwen3-8B-SFT-envbench_qwen-green-yellow
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-04-11 10:01:57 +08:00
commit 015f1b7b6e
48 changed files with 305370 additions and 0 deletions

304
trainer_state.json Normal file
View File

@@ -0,0 +1,304 @@
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 25,
"global_step": 25,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.21052631578947367,
"grad_norm": 3.703707456588745,
"learning_rate": 0.0,
"loss": 0.2265,
"num_input_tokens_seen": 90632,
"step": 1,
"train_runtime": 21.9544,
"train_tokens_per_second": 4128.187
},
{
"epoch": 0.42105263157894735,
"grad_norm": 3.695063352584839,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.2547,
"num_input_tokens_seen": 183712,
"step": 2,
"train_runtime": 31.6354,
"train_tokens_per_second": 5807.165
},
{
"epoch": 0.631578947368421,
"grad_norm": 3.6188080310821533,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.2468,
"num_input_tokens_seen": 277384,
"step": 3,
"train_runtime": 41.3805,
"train_tokens_per_second": 6703.247
},
{
"epoch": 0.8421052631578947,
"grad_norm": 1.5909664630889893,
"learning_rate": 5e-05,
"loss": 0.238,
"num_input_tokens_seen": 376672,
"step": 4,
"train_runtime": 51.4829,
"train_tokens_per_second": 7316.455
},
{
"epoch": 1.0,
"grad_norm": 1.5909664630889893,
"learning_rate": 4.9745536047023324e-05,
"loss": 0.2939,
"num_input_tokens_seen": 448584,
"step": 5,
"train_runtime": 56.2763,
"train_tokens_per_second": 7971.101
},
{
"epoch": 1.2105263157894737,
"grad_norm": 1.5388646125793457,
"learning_rate": 4.898732434036244e-05,
"loss": 0.3134,
"num_input_tokens_seen": 541576,
"step": 6,
"train_runtime": 66.044,
"train_tokens_per_second": 8200.224
},
{
"epoch": 1.4210526315789473,
"grad_norm": 1.0950462818145752,
"learning_rate": 4.774079988386296e-05,
"loss": 0.2642,
"num_input_tokens_seen": 637744,
"step": 7,
"train_runtime": 75.8587,
"train_tokens_per_second": 8407.001
},
{
"epoch": 1.631578947368421,
"grad_norm": 0.9882143139839172,
"learning_rate": 4.6031338320779534e-05,
"loss": 0.2666,
"num_input_tokens_seen": 732120,
"step": 8,
"train_runtime": 86.0089,
"train_tokens_per_second": 8512.144
},
{
"epoch": 1.8421052631578947,
"grad_norm": 0.7304351925849915,
"learning_rate": 4.389373935885646e-05,
"loss": 0.2346,
"num_input_tokens_seen": 833528,
"step": 9,
"train_runtime": 96.2093,
"train_tokens_per_second": 8663.694
},
{
"epoch": 2.0,
"grad_norm": 0.7826125025749207,
"learning_rate": 4.137151834863213e-05,
"loss": 0.251,
"num_input_tokens_seen": 897168,
"step": 10,
"train_runtime": 104.3221,
"train_tokens_per_second": 8599.982
},
{
"epoch": 2.2105263157894735,
"grad_norm": 0.8914588093757629,
"learning_rate": 3.851602043638994e-05,
"loss": 0.2168,
"num_input_tokens_seen": 993464,
"step": 11,
"train_runtime": 113.7023,
"train_tokens_per_second": 8737.415
},
{
"epoch": 2.4210526315789473,
"grad_norm": 0.7048306465148926,
"learning_rate": 3.5385375325047166e-05,
"loss": 0.1636,
"num_input_tokens_seen": 1092872,
"step": 12,
"train_runtime": 123.4543,
"train_tokens_per_second": 8852.44
},
{
"epoch": 2.6315789473684212,
"grad_norm": 0.5587737560272217,
"learning_rate": 3.2043313921035743e-05,
"loss": 0.1882,
"num_input_tokens_seen": 1188144,
"step": 13,
"train_runtime": 133.3972,
"train_tokens_per_second": 8906.815
},
{
"epoch": 2.8421052631578947,
"grad_norm": 0.43069741129875183,
"learning_rate": 2.8557870956832132e-05,
"loss": 0.161,
"num_input_tokens_seen": 1287152,
"step": 14,
"train_runtime": 143.1833,
"train_tokens_per_second": 8989.542
},
{
"epoch": 3.0,
"grad_norm": 0.9480004906654358,
"learning_rate": 2.5e-05,
"loss": 0.2328,
"num_input_tokens_seen": 1345752,
"step": 15,
"train_runtime": 150.9211,
"train_tokens_per_second": 8916.923
},
{
"epoch": 3.2105263157894735,
"grad_norm": 0.7457718253135681,
"learning_rate": 2.1442129043167874e-05,
"loss": 0.1513,
"num_input_tokens_seen": 1438888,
"step": 16,
"train_runtime": 160.8789,
"train_tokens_per_second": 8943.92
},
{
"epoch": 3.4210526315789473,
"grad_norm": 0.40165457129478455,
"learning_rate": 1.795668607896426e-05,
"loss": 0.1159,
"num_input_tokens_seen": 1535160,
"step": 17,
"train_runtime": 170.5415,
"train_tokens_per_second": 9001.68
},
{
"epoch": 3.6315789473684212,
"grad_norm": 0.3553405702114105,
"learning_rate": 1.4614624674952842e-05,
"loss": 0.1408,
"num_input_tokens_seen": 1629952,
"step": 18,
"train_runtime": 180.3174,
"train_tokens_per_second": 9039.349
},
{
"epoch": 3.8421052631578947,
"grad_norm": 0.4195708632469177,
"learning_rate": 1.148397956361007e-05,
"loss": 0.1207,
"num_input_tokens_seen": 1729264,
"step": 19,
"train_runtime": 190.1508,
"train_tokens_per_second": 9094.17
},
{
"epoch": 4.0,
"grad_norm": 0.6537638306617737,
"learning_rate": 8.628481651367876e-06,
"loss": 0.1767,
"num_input_tokens_seen": 1794336,
"step": 20,
"train_runtime": 197.9448,
"train_tokens_per_second": 9064.829
},
{
"epoch": 4.2105263157894735,
"grad_norm": 0.2990877628326416,
"learning_rate": 6.106260641143546e-06,
"loss": 0.0917,
"num_input_tokens_seen": 1893760,
"step": 21,
"train_runtime": 207.6689,
"train_tokens_per_second": 9119.132
},
{
"epoch": 4.421052631578947,
"grad_norm": 0.40062811970710754,
"learning_rate": 3.968661679220468e-06,
"loss": 0.1139,
"num_input_tokens_seen": 1981256,
"step": 22,
"train_runtime": 216.876,
"train_tokens_per_second": 9135.434
},
{
"epoch": 4.631578947368421,
"grad_norm": 0.31818389892578125,
"learning_rate": 2.2592001161370392e-06,
"loss": 0.1306,
"num_input_tokens_seen": 2077800,
"step": 23,
"train_runtime": 226.6964,
"train_tokens_per_second": 9165.564
},
{
"epoch": 4.842105263157895,
"grad_norm": 0.39665085077285767,
"learning_rate": 1.0126756596375686e-06,
"loss": 0.133,
"num_input_tokens_seen": 2170328,
"step": 24,
"train_runtime": 236.3103,
"train_tokens_per_second": 9184.228
},
{
"epoch": 5.0,
"grad_norm": 0.39665085077285767,
"learning_rate": 2.544639529766829e-07,
"loss": 0.1053,
"num_input_tokens_seen": 2242920,
"step": 25,
"train_runtime": 240.7089,
"train_tokens_per_second": 9317.977
},
{
"epoch": 5.0,
"eval_accuracy": 0.9471856009625939,
"eval_loss": 0.16564106941223145,
"eval_runtime": 0.5896,
"eval_samples_per_second": 8.48,
"eval_steps_per_second": 3.392,
"num_input_tokens_seen": 2242920,
"step": 25
},
{
"epoch": 5.0,
"num_input_tokens_seen": 2242920,
"step": 25,
"total_flos": 1.0185196765918003e+17,
"train_loss": 0.19328440070152283,
"train_runtime": 354.8577,
"train_samples_per_second": 1.071,
"train_steps_per_second": 0.07
}
],
"logging_steps": 1,
"max_steps": 25,
"num_input_tokens_seen": 2242920,
"num_train_epochs": 5,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.0185196765918003e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}