{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 144, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.034904013961605584, "grad_norm": 0.2724517285823822, "learning_rate": 5.333333333333334e-06, "loss": 0.0934, "step": 5 }, { "epoch": 0.06980802792321117, "grad_norm": 0.3414818048477173, "learning_rate": 1.2e-05, "loss": 0.0773, "step": 10 }, { "epoch": 0.10471204188481675, "grad_norm": 0.07792749255895615, "learning_rate": 1.866666666666667e-05, "loss": 0.0711, "step": 15 }, { "epoch": 0.13961605584642234, "grad_norm": 0.0294520054012537, "learning_rate": 1.995259033893236e-05, "loss": 0.0736, "step": 20 }, { "epoch": 0.17452006980802792, "grad_norm": 0.013957683928310871, "learning_rate": 1.9760758775559275e-05, "loss": 0.0697, "step": 25 }, { "epoch": 0.2094240837696335, "grad_norm": 0.065118707716465, "learning_rate": 1.9424380828337146e-05, "loss": 0.0699, "step": 30 }, { "epoch": 0.2443280977312391, "grad_norm": 0.021100476384162903, "learning_rate": 1.894843789440892e-05, "loss": 0.0697, "step": 35 }, { "epoch": 0.2792321116928447, "grad_norm": 0.026198429986834526, "learning_rate": 1.833997817889878e-05, "loss": 0.0695, "step": 40 }, { "epoch": 0.31413612565445026, "grad_norm": 0.07283973693847656, "learning_rate": 1.760801231854278e-05, "loss": 0.07, "step": 45 }, { "epoch": 0.34904013961605584, "grad_norm": 0.04578598588705063, "learning_rate": 1.676337994380903e-05, "loss": 0.0701, "step": 50 }, { "epoch": 0.38394415357766143, "grad_norm": 0.10095158964395523, "learning_rate": 1.581858915557953e-05, "loss": 0.0698, "step": 55 }, { "epoch": 0.418848167539267, "grad_norm": 0.028562646359205246, "learning_rate": 1.4787631293572094e-05, "loss": 0.0699, "step": 60 }, { "epoch": 0.4537521815008726, "grad_norm": 0.02697976492345333, "learning_rate": 1.368577373958362e-05, "loss": 0.0695, "step": 65 }, { "epoch": 0.4886561954624782, "grad_norm": 0.0685800239443779, "learning_rate": 1.2529333823916807e-05, "loss": 0.0696, "step": 70 }, { "epoch": 0.5235602094240838, "grad_norm": 0.13133621215820312, "learning_rate": 1.133543718319398e-05, "loss": 0.0713, "step": 75 }, { "epoch": 0.5584642233856894, "grad_norm": 0.017290577292442322, "learning_rate": 1.0121764148019977e-05, "loss": 0.0696, "step": 80 }, { "epoch": 0.5933682373472949, "grad_norm": 0.05858515202999115, "learning_rate": 8.906287916221259e-06, "loss": 0.0696, "step": 85 }, { "epoch": 0.6282722513089005, "grad_norm": 0.07648473978042603, "learning_rate": 7.707008389035102e-06, "loss": 0.0699, "step": 90 }, { "epoch": 0.6631762652705061, "grad_norm": 0.052451424300670624, "learning_rate": 6.5416856118498874e-06, "loss": 0.0697, "step": 95 }, { "epoch": 0.6980802792321117, "grad_norm": 0.03691520541906357, "learning_rate": 5.427576766953615e-06, "loss": 0.0697, "step": 100 }, { "epoch": 0.7329842931937173, "grad_norm": 0.003152969991788268, "learning_rate": 4.381180613146396e-06, "loss": 0.0695, "step": 105 }, { "epoch": 0.7678883071553229, "grad_norm": 0.017924955114722252, "learning_rate": 3.4179931567925216e-06, "loss": 0.0694, "step": 110 }, { "epoch": 0.8027923211169284, "grad_norm": 0.04167533293366432, "learning_rate": 2.5522781725621814e-06, "loss": 0.0694, "step": 115 }, { "epoch": 0.837696335078534, "grad_norm": 0.03422262519598007, "learning_rate": 1.7968559722048906e-06, "loss": 0.0692, "step": 120 }, { "epoch": 0.8726003490401396, "grad_norm": 0.0365980863571167, "learning_rate": 1.1629135494628097e-06, "loss": 0.0696, "step": 125 }, { "epoch": 0.9075043630017452, "grad_norm": 0.032294586300849915, "learning_rate": 6.598389126745209e-07, "loss": 0.0695, "step": 130 }, { "epoch": 0.9424083769633508, "grad_norm": 0.001334571628831327, "learning_rate": 2.9508205842594727e-07, "loss": 0.0695, "step": 135 }, { "epoch": 0.9773123909249564, "grad_norm": 0.05335932970046997, "learning_rate": 7.404464507973608e-08, "loss": 0.0693, "step": 140 }, { "epoch": 1.0, "step": 144, "total_flos": 2.4545020729727386e+17, "train_loss": 0.07097241137590674, "train_runtime": 1113.1898, "train_samples_per_second": 16.457, "train_steps_per_second": 0.129 } ], "logging_steps": 5, "max_steps": 144, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.4545020729727386e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }