{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 118, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 1.6852783646434546, "epoch": 0.08537886872998933, "grad_norm": 0.539056658744812, "learning_rate": 9.56140350877193e-06, "loss": 1.5916325569152832, "mean_token_accuracy": 0.6394169898703694, "num_tokens": 1036987.0, "step": 10 }, { "entropy": 1.3057927396148443, "epoch": 0.17075773745997866, "grad_norm": 0.09388110786676407, "learning_rate": 8.68421052631579e-06, "loss": 1.1732550621032716, "mean_token_accuracy": 0.6894607817754149, "num_tokens": 2099721.0, "step": 20 }, { "entropy": 1.24310187920928, "epoch": 0.256136606189968, "grad_norm": 0.08361112326383591, "learning_rate": 7.80701754385965e-06, "loss": 1.083221435546875, "mean_token_accuracy": 0.7096419665962458, "num_tokens": 3148066.0, "step": 30 }, { "entropy": 1.2204767568036914, "epoch": 0.3415154749199573, "grad_norm": 0.06735046952962875, "learning_rate": 6.92982456140351e-06, "loss": 1.0362739562988281, "mean_token_accuracy": 0.7181693298742176, "num_tokens": 4198507.0, "step": 40 }, { "entropy": 1.1896080307662487, "epoch": 0.42689434364994666, "grad_norm": 0.05402417853474617, "learning_rate": 6.0526315789473685e-06, "loss": 1.0045047760009767, "mean_token_accuracy": 0.7199778087437153, "num_tokens": 5240508.0, "step": 50 }, { "entropy": 1.136293525248766, "epoch": 0.512273212379936, "grad_norm": 0.04568689689040184, "learning_rate": 5.175438596491229e-06, "loss": 0.9714550018310547, "mean_token_accuracy": 0.7258936163038016, "num_tokens": 6301215.0, "step": 60 }, { "entropy": 1.110643889568746, "epoch": 0.5976520811099253, "grad_norm": 0.04777698218822479, "learning_rate": 4.298245614035088e-06, "loss": 0.9732734680175781, "mean_token_accuracy": 0.7243976121768355, "num_tokens": 7347766.0, "step": 70 }, { "entropy": 1.0825907880440355, "epoch": 0.6830309498399146, "grad_norm": 0.04266300052404404, "learning_rate": 3.421052631578948e-06, "loss": 0.9640823364257812, "mean_token_accuracy": 0.7246530564501882, "num_tokens": 8414578.0, "step": 80 }, { "entropy": 1.0757255567237736, "epoch": 0.768409818569904, "grad_norm": 0.06162378564476967, "learning_rate": 2.5438596491228075e-06, "loss": 0.9552610397338868, "mean_token_accuracy": 0.7269493261352181, "num_tokens": 9448833.0, "step": 90 }, { "entropy": 1.07811812851578, "epoch": 0.8537886872998933, "grad_norm": 0.044764406979084015, "learning_rate": 1.6666666666666667e-06, "loss": 0.9457586288452149, "mean_token_accuracy": 0.7318377941846848, "num_tokens": 10496356.0, "step": 100 }, { "entropy": 1.0599956944584847, "epoch": 0.9391675560298826, "grad_norm": 0.047913916409015656, "learning_rate": 7.894736842105263e-07, "loss": 0.9395035743713379, "mean_token_accuracy": 0.7343964511528611, "num_tokens": 11547813.0, "step": 110 } ], "logging_steps": 10, "max_steps": 118, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.8750638152640102e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }