{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 81, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 0.7260835453867912, "epoch": 0.37735849056603776, "grad_norm": 0.9797868132591248, "learning_rate": 0.00018, "loss": 1.0297086715698243, "mean_token_accuracy": 0.8047336474061012, "num_tokens": 41605.0, "step": 10 }, { "entropy": 0.6147415287792682, "epoch": 0.7547169811320755, "grad_norm": 0.8319393992424011, "learning_rate": 0.00019217488001088784, "loss": 0.610823392868042, "mean_token_accuracy": 0.8518229335546493, "num_tokens": 86144.0, "step": 20 }, { "entropy": 0.5127407720214442, "epoch": 1.1132075471698113, "grad_norm": 0.898364782333374, "learning_rate": 0.0001666935530836651, "loss": 0.47104392051696775, "mean_token_accuracy": 0.8734802854688544, "num_tokens": 125093.0, "step": 30 }, { "entropy": 0.35390120558440685, "epoch": 1.490566037735849, "grad_norm": 0.6483786702156067, "learning_rate": 0.0001283661778334297, "loss": 0.34734306335449217, "mean_token_accuracy": 0.9023109719157218, "num_tokens": 167344.0, "step": 40 }, { "entropy": 0.2861595153808594, "epoch": 1.8679245283018868, "grad_norm": 0.6667284965515137, "learning_rate": 8.457510670346976e-05, "loss": 0.27520730495452883, "mean_token_accuracy": 0.9225298032164574, "num_tokens": 209654.0, "step": 50 }, { "entropy": 0.21439368748351148, "epoch": 2.2264150943396226, "grad_norm": 0.9153143763542175, "learning_rate": 4.375507123592194e-05, "loss": 0.19489681720733643, "mean_token_accuracy": 0.9477020047212902, "num_tokens": 250416.0, "step": 60 }, { "entropy": 0.1475609978660941, "epoch": 2.6037735849056602, "grad_norm": 0.5222585201263428, "learning_rate": 1.3768542747997215e-05, "loss": 0.13845933675765992, "mean_token_accuracy": 0.9617140784859657, "num_tokens": 293209.0, "step": 70 }, { "entropy": 0.14689538963139057, "epoch": 2.981132075471698, "grad_norm": 0.6572704315185547, "learning_rate": 3.913177925055189e-07, "loss": 0.13648871183395386, "mean_token_accuracy": 0.9615253210067749, "num_tokens": 335896.0, "step": 80 } ], "logging_steps": 10, "max_steps": 81, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.0410264608835584e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }