{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 54, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 0.7260835453867912, "epoch": 0.37735849056603776, "grad_norm": 0.9797868132591248, "learning_rate": 0.00018, "loss": 1.0297086715698243, "mean_token_accuracy": 0.8047336474061012, "num_tokens": 41605.0, "step": 10 }, { "entropy": 0.6147415287792682, "epoch": 0.7547169811320755, "grad_norm": 0.8319393992424011, "learning_rate": 0.00019217488001088784, "loss": 0.610823392868042, "mean_token_accuracy": 0.8518229335546493, "num_tokens": 86144.0, "step": 20 }, { "entropy": 0.5127407720214442, "epoch": 1.1132075471698113, "grad_norm": 0.898364782333374, "learning_rate": 0.0001666935530836651, "loss": 0.47104392051696775, "mean_token_accuracy": 0.8734802854688544, "num_tokens": 125093.0, "step": 30 }, { "entropy": 0.35390120558440685, "epoch": 1.490566037735849, "grad_norm": 0.6483786702156067, "learning_rate": 0.0001283661778334297, "loss": 0.34734306335449217, "mean_token_accuracy": 0.9023109719157218, "num_tokens": 167344.0, "step": 40 }, { "entropy": 0.2861595153808594, "epoch": 1.8679245283018868, "grad_norm": 0.6667284965515137, "learning_rate": 8.457510670346976e-05, "loss": 0.27520730495452883, "mean_token_accuracy": 0.9225298032164574, "num_tokens": 209654.0, "step": 50 } ], "logging_steps": 10, "max_steps": 81, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.3533277150804992e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }