初始化项目,由ModelHub XC社区提供模型
Model: waleko/Qwen3-8B-SFT-envbench_qwen-green-yellow Source: Original Platform
This commit is contained in:
304
result_model/trainer_state.json
Normal file
304
result_model/trainer_state.json
Normal file
@@ -0,0 +1,304 @@
|
||||
{
|
||||
"best_global_step": null,
|
||||
"best_metric": null,
|
||||
"best_model_checkpoint": null,
|
||||
"epoch": 5.0,
|
||||
"eval_steps": 25,
|
||||
"global_step": 25,
|
||||
"is_hyper_param_search": false,
|
||||
"is_local_process_zero": true,
|
||||
"is_world_process_zero": true,
|
||||
"log_history": [
|
||||
{
|
||||
"epoch": 0.21052631578947367,
|
||||
"grad_norm": 3.703707456588745,
|
||||
"learning_rate": 0.0,
|
||||
"loss": 0.2265,
|
||||
"num_input_tokens_seen": 90632,
|
||||
"step": 1,
|
||||
"train_runtime": 21.9544,
|
||||
"train_tokens_per_second": 4128.187
|
||||
},
|
||||
{
|
||||
"epoch": 0.42105263157894735,
|
||||
"grad_norm": 3.695063352584839,
|
||||
"learning_rate": 1.6666666666666667e-05,
|
||||
"loss": 0.2547,
|
||||
"num_input_tokens_seen": 183712,
|
||||
"step": 2,
|
||||
"train_runtime": 31.6354,
|
||||
"train_tokens_per_second": 5807.165
|
||||
},
|
||||
{
|
||||
"epoch": 0.631578947368421,
|
||||
"grad_norm": 3.6188080310821533,
|
||||
"learning_rate": 3.3333333333333335e-05,
|
||||
"loss": 0.2468,
|
||||
"num_input_tokens_seen": 277384,
|
||||
"step": 3,
|
||||
"train_runtime": 41.3805,
|
||||
"train_tokens_per_second": 6703.247
|
||||
},
|
||||
{
|
||||
"epoch": 0.8421052631578947,
|
||||
"grad_norm": 1.5909664630889893,
|
||||
"learning_rate": 5e-05,
|
||||
"loss": 0.238,
|
||||
"num_input_tokens_seen": 376672,
|
||||
"step": 4,
|
||||
"train_runtime": 51.4829,
|
||||
"train_tokens_per_second": 7316.455
|
||||
},
|
||||
{
|
||||
"epoch": 1.0,
|
||||
"grad_norm": 1.5909664630889893,
|
||||
"learning_rate": 4.9745536047023324e-05,
|
||||
"loss": 0.2939,
|
||||
"num_input_tokens_seen": 448584,
|
||||
"step": 5,
|
||||
"train_runtime": 56.2763,
|
||||
"train_tokens_per_second": 7971.101
|
||||
},
|
||||
{
|
||||
"epoch": 1.2105263157894737,
|
||||
"grad_norm": 1.5388646125793457,
|
||||
"learning_rate": 4.898732434036244e-05,
|
||||
"loss": 0.3134,
|
||||
"num_input_tokens_seen": 541576,
|
||||
"step": 6,
|
||||
"train_runtime": 66.044,
|
||||
"train_tokens_per_second": 8200.224
|
||||
},
|
||||
{
|
||||
"epoch": 1.4210526315789473,
|
||||
"grad_norm": 1.0950462818145752,
|
||||
"learning_rate": 4.774079988386296e-05,
|
||||
"loss": 0.2642,
|
||||
"num_input_tokens_seen": 637744,
|
||||
"step": 7,
|
||||
"train_runtime": 75.8587,
|
||||
"train_tokens_per_second": 8407.001
|
||||
},
|
||||
{
|
||||
"epoch": 1.631578947368421,
|
||||
"grad_norm": 0.9882143139839172,
|
||||
"learning_rate": 4.6031338320779534e-05,
|
||||
"loss": 0.2666,
|
||||
"num_input_tokens_seen": 732120,
|
||||
"step": 8,
|
||||
"train_runtime": 86.0089,
|
||||
"train_tokens_per_second": 8512.144
|
||||
},
|
||||
{
|
||||
"epoch": 1.8421052631578947,
|
||||
"grad_norm": 0.7304351925849915,
|
||||
"learning_rate": 4.389373935885646e-05,
|
||||
"loss": 0.2346,
|
||||
"num_input_tokens_seen": 833528,
|
||||
"step": 9,
|
||||
"train_runtime": 96.2093,
|
||||
"train_tokens_per_second": 8663.694
|
||||
},
|
||||
{
|
||||
"epoch": 2.0,
|
||||
"grad_norm": 0.7826125025749207,
|
||||
"learning_rate": 4.137151834863213e-05,
|
||||
"loss": 0.251,
|
||||
"num_input_tokens_seen": 897168,
|
||||
"step": 10,
|
||||
"train_runtime": 104.3221,
|
||||
"train_tokens_per_second": 8599.982
|
||||
},
|
||||
{
|
||||
"epoch": 2.2105263157894735,
|
||||
"grad_norm": 0.8914588093757629,
|
||||
"learning_rate": 3.851602043638994e-05,
|
||||
"loss": 0.2168,
|
||||
"num_input_tokens_seen": 993464,
|
||||
"step": 11,
|
||||
"train_runtime": 113.7023,
|
||||
"train_tokens_per_second": 8737.415
|
||||
},
|
||||
{
|
||||
"epoch": 2.4210526315789473,
|
||||
"grad_norm": 0.7048306465148926,
|
||||
"learning_rate": 3.5385375325047166e-05,
|
||||
"loss": 0.1636,
|
||||
"num_input_tokens_seen": 1092872,
|
||||
"step": 12,
|
||||
"train_runtime": 123.4543,
|
||||
"train_tokens_per_second": 8852.44
|
||||
},
|
||||
{
|
||||
"epoch": 2.6315789473684212,
|
||||
"grad_norm": 0.5587737560272217,
|
||||
"learning_rate": 3.2043313921035743e-05,
|
||||
"loss": 0.1882,
|
||||
"num_input_tokens_seen": 1188144,
|
||||
"step": 13,
|
||||
"train_runtime": 133.3972,
|
||||
"train_tokens_per_second": 8906.815
|
||||
},
|
||||
{
|
||||
"epoch": 2.8421052631578947,
|
||||
"grad_norm": 0.43069741129875183,
|
||||
"learning_rate": 2.8557870956832132e-05,
|
||||
"loss": 0.161,
|
||||
"num_input_tokens_seen": 1287152,
|
||||
"step": 14,
|
||||
"train_runtime": 143.1833,
|
||||
"train_tokens_per_second": 8989.542
|
||||
},
|
||||
{
|
||||
"epoch": 3.0,
|
||||
"grad_norm": 0.9480004906654358,
|
||||
"learning_rate": 2.5e-05,
|
||||
"loss": 0.2328,
|
||||
"num_input_tokens_seen": 1345752,
|
||||
"step": 15,
|
||||
"train_runtime": 150.9211,
|
||||
"train_tokens_per_second": 8916.923
|
||||
},
|
||||
{
|
||||
"epoch": 3.2105263157894735,
|
||||
"grad_norm": 0.7457718253135681,
|
||||
"learning_rate": 2.1442129043167874e-05,
|
||||
"loss": 0.1513,
|
||||
"num_input_tokens_seen": 1438888,
|
||||
"step": 16,
|
||||
"train_runtime": 160.8789,
|
||||
"train_tokens_per_second": 8943.92
|
||||
},
|
||||
{
|
||||
"epoch": 3.4210526315789473,
|
||||
"grad_norm": 0.40165457129478455,
|
||||
"learning_rate": 1.795668607896426e-05,
|
||||
"loss": 0.1159,
|
||||
"num_input_tokens_seen": 1535160,
|
||||
"step": 17,
|
||||
"train_runtime": 170.5415,
|
||||
"train_tokens_per_second": 9001.68
|
||||
},
|
||||
{
|
||||
"epoch": 3.6315789473684212,
|
||||
"grad_norm": 0.3553405702114105,
|
||||
"learning_rate": 1.4614624674952842e-05,
|
||||
"loss": 0.1408,
|
||||
"num_input_tokens_seen": 1629952,
|
||||
"step": 18,
|
||||
"train_runtime": 180.3174,
|
||||
"train_tokens_per_second": 9039.349
|
||||
},
|
||||
{
|
||||
"epoch": 3.8421052631578947,
|
||||
"grad_norm": 0.4195708632469177,
|
||||
"learning_rate": 1.148397956361007e-05,
|
||||
"loss": 0.1207,
|
||||
"num_input_tokens_seen": 1729264,
|
||||
"step": 19,
|
||||
"train_runtime": 190.1508,
|
||||
"train_tokens_per_second": 9094.17
|
||||
},
|
||||
{
|
||||
"epoch": 4.0,
|
||||
"grad_norm": 0.6537638306617737,
|
||||
"learning_rate": 8.628481651367876e-06,
|
||||
"loss": 0.1767,
|
||||
"num_input_tokens_seen": 1794336,
|
||||
"step": 20,
|
||||
"train_runtime": 197.9448,
|
||||
"train_tokens_per_second": 9064.829
|
||||
},
|
||||
{
|
||||
"epoch": 4.2105263157894735,
|
||||
"grad_norm": 0.2990877628326416,
|
||||
"learning_rate": 6.106260641143546e-06,
|
||||
"loss": 0.0917,
|
||||
"num_input_tokens_seen": 1893760,
|
||||
"step": 21,
|
||||
"train_runtime": 207.6689,
|
||||
"train_tokens_per_second": 9119.132
|
||||
},
|
||||
{
|
||||
"epoch": 4.421052631578947,
|
||||
"grad_norm": 0.40062811970710754,
|
||||
"learning_rate": 3.968661679220468e-06,
|
||||
"loss": 0.1139,
|
||||
"num_input_tokens_seen": 1981256,
|
||||
"step": 22,
|
||||
"train_runtime": 216.876,
|
||||
"train_tokens_per_second": 9135.434
|
||||
},
|
||||
{
|
||||
"epoch": 4.631578947368421,
|
||||
"grad_norm": 0.31818389892578125,
|
||||
"learning_rate": 2.2592001161370392e-06,
|
||||
"loss": 0.1306,
|
||||
"num_input_tokens_seen": 2077800,
|
||||
"step": 23,
|
||||
"train_runtime": 226.6964,
|
||||
"train_tokens_per_second": 9165.564
|
||||
},
|
||||
{
|
||||
"epoch": 4.842105263157895,
|
||||
"grad_norm": 0.39665085077285767,
|
||||
"learning_rate": 1.0126756596375686e-06,
|
||||
"loss": 0.133,
|
||||
"num_input_tokens_seen": 2170328,
|
||||
"step": 24,
|
||||
"train_runtime": 236.3103,
|
||||
"train_tokens_per_second": 9184.228
|
||||
},
|
||||
{
|
||||
"epoch": 5.0,
|
||||
"grad_norm": 0.39665085077285767,
|
||||
"learning_rate": 2.544639529766829e-07,
|
||||
"loss": 0.1053,
|
||||
"num_input_tokens_seen": 2242920,
|
||||
"step": 25,
|
||||
"train_runtime": 240.7089,
|
||||
"train_tokens_per_second": 9317.977
|
||||
},
|
||||
{
|
||||
"epoch": 5.0,
|
||||
"eval_accuracy": 0.9471856009625939,
|
||||
"eval_loss": 0.16564106941223145,
|
||||
"eval_runtime": 0.5896,
|
||||
"eval_samples_per_second": 8.48,
|
||||
"eval_steps_per_second": 3.392,
|
||||
"num_input_tokens_seen": 2242920,
|
||||
"step": 25
|
||||
},
|
||||
{
|
||||
"epoch": 5.0,
|
||||
"num_input_tokens_seen": 2242920,
|
||||
"step": 25,
|
||||
"total_flos": 1.0185196765918003e+17,
|
||||
"train_loss": 0.19328440070152283,
|
||||
"train_runtime": 354.8577,
|
||||
"train_samples_per_second": 1.071,
|
||||
"train_steps_per_second": 0.07
|
||||
}
|
||||
],
|
||||
"logging_steps": 1,
|
||||
"max_steps": 25,
|
||||
"num_input_tokens_seen": 2242920,
|
||||
"num_train_epochs": 5,
|
||||
"save_steps": 1000,
|
||||
"stateful_callbacks": {
|
||||
"TrainerControl": {
|
||||
"args": {
|
||||
"should_epoch_stop": false,
|
||||
"should_evaluate": false,
|
||||
"should_log": false,
|
||||
"should_save": true,
|
||||
"should_training_stop": true
|
||||
},
|
||||
"attributes": {}
|
||||
}
|
||||
},
|
||||
"total_flos": 1.0185196765918003e+17,
|
||||
"train_batch_size": 1,
|
||||
"trial_name": null,
|
||||
"trial_params": null
|
||||
}
|
||||
Reference in New Issue
Block a user