145 lines
4.0 KiB
JSON
145 lines
4.0 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 500,
|
|
"global_step": 118,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"entropy": 1.6852783646434546,
|
|
"epoch": 0.08537886872998933,
|
|
"grad_norm": 0.539056658744812,
|
|
"learning_rate": 9.56140350877193e-06,
|
|
"loss": 1.5916325569152832,
|
|
"mean_token_accuracy": 0.6394169898703694,
|
|
"num_tokens": 1036987.0,
|
|
"step": 10
|
|
},
|
|
{
|
|
"entropy": 1.3057927396148443,
|
|
"epoch": 0.17075773745997866,
|
|
"grad_norm": 0.09388110786676407,
|
|
"learning_rate": 8.68421052631579e-06,
|
|
"loss": 1.1732550621032716,
|
|
"mean_token_accuracy": 0.6894607817754149,
|
|
"num_tokens": 2099721.0,
|
|
"step": 20
|
|
},
|
|
{
|
|
"entropy": 1.24310187920928,
|
|
"epoch": 0.256136606189968,
|
|
"grad_norm": 0.08361112326383591,
|
|
"learning_rate": 7.80701754385965e-06,
|
|
"loss": 1.083221435546875,
|
|
"mean_token_accuracy": 0.7096419665962458,
|
|
"num_tokens": 3148066.0,
|
|
"step": 30
|
|
},
|
|
{
|
|
"entropy": 1.2204767568036914,
|
|
"epoch": 0.3415154749199573,
|
|
"grad_norm": 0.06735046952962875,
|
|
"learning_rate": 6.92982456140351e-06,
|
|
"loss": 1.0362739562988281,
|
|
"mean_token_accuracy": 0.7181693298742176,
|
|
"num_tokens": 4198507.0,
|
|
"step": 40
|
|
},
|
|
{
|
|
"entropy": 1.1896080307662487,
|
|
"epoch": 0.42689434364994666,
|
|
"grad_norm": 0.05402417853474617,
|
|
"learning_rate": 6.0526315789473685e-06,
|
|
"loss": 1.0045047760009767,
|
|
"mean_token_accuracy": 0.7199778087437153,
|
|
"num_tokens": 5240508.0,
|
|
"step": 50
|
|
},
|
|
{
|
|
"entropy": 1.136293525248766,
|
|
"epoch": 0.512273212379936,
|
|
"grad_norm": 0.04568689689040184,
|
|
"learning_rate": 5.175438596491229e-06,
|
|
"loss": 0.9714550018310547,
|
|
"mean_token_accuracy": 0.7258936163038016,
|
|
"num_tokens": 6301215.0,
|
|
"step": 60
|
|
},
|
|
{
|
|
"entropy": 1.110643889568746,
|
|
"epoch": 0.5976520811099253,
|
|
"grad_norm": 0.04777698218822479,
|
|
"learning_rate": 4.298245614035088e-06,
|
|
"loss": 0.9732734680175781,
|
|
"mean_token_accuracy": 0.7243976121768355,
|
|
"num_tokens": 7347766.0,
|
|
"step": 70
|
|
},
|
|
{
|
|
"entropy": 1.0825907880440355,
|
|
"epoch": 0.6830309498399146,
|
|
"grad_norm": 0.04266300052404404,
|
|
"learning_rate": 3.421052631578948e-06,
|
|
"loss": 0.9640823364257812,
|
|
"mean_token_accuracy": 0.7246530564501882,
|
|
"num_tokens": 8414578.0,
|
|
"step": 80
|
|
},
|
|
{
|
|
"entropy": 1.0757255567237736,
|
|
"epoch": 0.768409818569904,
|
|
"grad_norm": 0.06162378564476967,
|
|
"learning_rate": 2.5438596491228075e-06,
|
|
"loss": 0.9552610397338868,
|
|
"mean_token_accuracy": 0.7269493261352181,
|
|
"num_tokens": 9448833.0,
|
|
"step": 90
|
|
},
|
|
{
|
|
"entropy": 1.07811812851578,
|
|
"epoch": 0.8537886872998933,
|
|
"grad_norm": 0.044764406979084015,
|
|
"learning_rate": 1.6666666666666667e-06,
|
|
"loss": 0.9457586288452149,
|
|
"mean_token_accuracy": 0.7318377941846848,
|
|
"num_tokens": 10496356.0,
|
|
"step": 100
|
|
},
|
|
{
|
|
"entropy": 1.0599956944584847,
|
|
"epoch": 0.9391675560298826,
|
|
"grad_norm": 0.047913916409015656,
|
|
"learning_rate": 7.894736842105263e-07,
|
|
"loss": 0.9395035743713379,
|
|
"mean_token_accuracy": 0.7343964511528611,
|
|
"num_tokens": 11547813.0,
|
|
"step": 110
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 118,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.8750638152640102e+17,
|
|
"train_batch_size": 2,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|