{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.023986039375519717, "eval_steps": 99999999, "global_step": 128, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001873909326212478, "grad_norm": 245.6398468017578, "learning_rate": 5e-05, "loss": 175.7636, "step": 1 }, { "epoch": 0.0001873909326212478, "eval_accuracy": 0.015452299171252725, "eval_loss": 10.732580184936523, "eval_runtime": 219.7551, "eval_samples_per_second": 35.922, "eval_steps_per_second": 4.491, "step": 1 }, { "epoch": 0.0003747818652424956, "grad_norm": 113.21331024169922, "learning_rate": 4.9990631440884397e-05, "loss": 161.0339, "step": 2 }, { "epoch": 0.0003747818652424956, "eval_accuracy": 0.015452299171252725, "eval_loss": 10.66187572479248, "eval_runtime": 137.4523, "eval_samples_per_second": 57.431, "eval_steps_per_second": 7.181, "step": 2 }, { "epoch": 0.0007495637304849911, "grad_norm": 65.50191497802734, "learning_rate": 4.997189432265318e-05, "loss": 156.102, "step": 4 }, { "epoch": 0.0007495637304849911, "eval_accuracy": 0.015464063058397669, "eval_loss": 10.59908676147461, "eval_runtime": 188.7788, "eval_samples_per_second": 41.816, "eval_steps_per_second": 5.228, "step": 4 }, { "epoch": 0.0014991274609699823, "grad_norm": 38.306678771972656, "learning_rate": 4.993442008619075e-05, "loss": 152.2921, "step": 8 }, { "epoch": 0.0014991274609699823, "eval_accuracy": 0.015459481333930691, "eval_loss": 10.50749397277832, "eval_runtime": 140.9589, "eval_samples_per_second": 56.002, "eval_steps_per_second": 7.002, "step": 8 }, { "epoch": 0.0029982549219399646, "grad_norm": 33.0531005859375, "learning_rate": 4.985947161326589e-05, "loss": 148.7237, "step": 16 }, { "epoch": 0.0029982549219399646, "eval_accuracy": 0.018715353804478252, "eval_loss": 10.448219299316406, "eval_runtime": 215.51, "eval_samples_per_second": 36.629, "eval_steps_per_second": 4.58, "step": 16 }, { "epoch": 0.005996509843879929, "grad_norm": 33.14303207397461, "learning_rate": 4.970957466741615e-05, "loss": 141.2197, "step": 32 }, { "epoch": 0.005996509843879929, "eval_accuracy": 0.02386372614066984, "eval_loss": 10.312174797058105, "eval_runtime": 201.4605, "eval_samples_per_second": 39.184, "eval_steps_per_second": 4.899, "step": 32 }, { "epoch": 0.011993019687759858, "grad_norm": 17.63327407836914, "learning_rate": 4.94097807757167e-05, "loss": 128.0666, "step": 64 }, { "epoch": 0.011993019687759858, "eval_accuracy": 0.0342046782626398, "eval_loss": 9.543290138244629, "eval_runtime": 210.7504, "eval_samples_per_second": 37.457, "eval_steps_per_second": 4.683, "step": 64 }, { "epoch": 0.023986039375519717, "grad_norm": 22.085046768188477, "learning_rate": 4.881019299231778e-05, "loss": 118.9934, "step": 128 }, { "epoch": 0.023986039375519717, "eval_accuracy": 0.03304265387350131, "eval_loss": 7.98660945892334, "eval_runtime": 227.9954, "eval_samples_per_second": 34.624, "eval_steps_per_second": 4.329, "step": 128 } ], "logging_steps": 99999999, "max_steps": 5337, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 99999999, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8562017304576000.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }