Files
Llama-3.1-8B-precise_if/checkpoint-118/trainer_state.json
ModelHub XC 8201d7ffe6 初始化项目,由ModelHub XC社区提供模型
Model: mremila/Llama-3.1-8B-precise_if
Source: Original Platform
2026-04-25 15:26:03 +08:00

145 lines
4.0 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 118,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"entropy": 1.6852783646434546,
"epoch": 0.08537886872998933,
"grad_norm": 0.539056658744812,
"learning_rate": 9.56140350877193e-06,
"loss": 1.5916325569152832,
"mean_token_accuracy": 0.6394169898703694,
"num_tokens": 1036987.0,
"step": 10
},
{
"entropy": 1.3057927396148443,
"epoch": 0.17075773745997866,
"grad_norm": 0.09388110786676407,
"learning_rate": 8.68421052631579e-06,
"loss": 1.1732550621032716,
"mean_token_accuracy": 0.6894607817754149,
"num_tokens": 2099721.0,
"step": 20
},
{
"entropy": 1.24310187920928,
"epoch": 0.256136606189968,
"grad_norm": 0.08361112326383591,
"learning_rate": 7.80701754385965e-06,
"loss": 1.083221435546875,
"mean_token_accuracy": 0.7096419665962458,
"num_tokens": 3148066.0,
"step": 30
},
{
"entropy": 1.2204767568036914,
"epoch": 0.3415154749199573,
"grad_norm": 0.06735046952962875,
"learning_rate": 6.92982456140351e-06,
"loss": 1.0362739562988281,
"mean_token_accuracy": 0.7181693298742176,
"num_tokens": 4198507.0,
"step": 40
},
{
"entropy": 1.1896080307662487,
"epoch": 0.42689434364994666,
"grad_norm": 0.05402417853474617,
"learning_rate": 6.0526315789473685e-06,
"loss": 1.0045047760009767,
"mean_token_accuracy": 0.7199778087437153,
"num_tokens": 5240508.0,
"step": 50
},
{
"entropy": 1.136293525248766,
"epoch": 0.512273212379936,
"grad_norm": 0.04568689689040184,
"learning_rate": 5.175438596491229e-06,
"loss": 0.9714550018310547,
"mean_token_accuracy": 0.7258936163038016,
"num_tokens": 6301215.0,
"step": 60
},
{
"entropy": 1.110643889568746,
"epoch": 0.5976520811099253,
"grad_norm": 0.04777698218822479,
"learning_rate": 4.298245614035088e-06,
"loss": 0.9732734680175781,
"mean_token_accuracy": 0.7243976121768355,
"num_tokens": 7347766.0,
"step": 70
},
{
"entropy": 1.0825907880440355,
"epoch": 0.6830309498399146,
"grad_norm": 0.04266300052404404,
"learning_rate": 3.421052631578948e-06,
"loss": 0.9640823364257812,
"mean_token_accuracy": 0.7246530564501882,
"num_tokens": 8414578.0,
"step": 80
},
{
"entropy": 1.0757255567237736,
"epoch": 0.768409818569904,
"grad_norm": 0.06162378564476967,
"learning_rate": 2.5438596491228075e-06,
"loss": 0.9552610397338868,
"mean_token_accuracy": 0.7269493261352181,
"num_tokens": 9448833.0,
"step": 90
},
{
"entropy": 1.07811812851578,
"epoch": 0.8537886872998933,
"grad_norm": 0.044764406979084015,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.9457586288452149,
"mean_token_accuracy": 0.7318377941846848,
"num_tokens": 10496356.0,
"step": 100
},
{
"entropy": 1.0599956944584847,
"epoch": 0.9391675560298826,
"grad_norm": 0.047913916409015656,
"learning_rate": 7.894736842105263e-07,
"loss": 0.9395035743713379,
"mean_token_accuracy": 0.7343964511528611,
"num_tokens": 11547813.0,
"step": 110
}
],
"logging_steps": 10,
"max_steps": 118,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.8750638152640102e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}