{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 205, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04884004884004884, "grad_norm": 2.951613426208496, "learning_rate": 6.545454545454546e-06, "loss": 1.6103919982910155, "step": 10 }, { "epoch": 0.09768009768009768, "grad_norm": 1.4161419868469238, "learning_rate": 7.9664804049057e-06, "loss": 1.1710912704467773, "step": 20 }, { "epoch": 0.14652014652014653, "grad_norm": 1.1890255212783813, "learning_rate": 7.831269296751948e-06, "loss": 1.0446645736694335, "step": 30 }, { "epoch": 0.19536019536019536, "grad_norm": 1.1855751276016235, "learning_rate": 7.595806964341581e-06, "loss": 0.9899624824523926, "step": 40 }, { "epoch": 0.2442002442002442, "grad_norm": 1.4533610343933105, "learning_rate": 7.266254652228843e-06, "loss": 0.9951982498168945, "step": 50 }, { "epoch": 0.29304029304029305, "grad_norm": 1.264499545097351, "learning_rate": 6.851235618187317e-06, "loss": 0.9782312393188477, "step": 60 }, { "epoch": 0.3418803418803419, "grad_norm": 1.3089553117752075, "learning_rate": 6.36160949202369e-06, "loss": 0.9667717933654785, "step": 70 }, { "epoch": 0.3907203907203907, "grad_norm": 1.0916252136230469, "learning_rate": 5.810188116178156e-06, "loss": 0.9572884559631347, "step": 80 }, { "epoch": 0.43956043956043955, "grad_norm": 1.2539805173873901, "learning_rate": 5.211400303591802e-06, "loss": 0.9558270454406739, "step": 90 }, { "epoch": 0.4884004884004884, "grad_norm": 1.1267527341842651, "learning_rate": 4.580914284981961e-06, "loss": 0.9527605056762696, "step": 100 }, { "epoch": 0.5372405372405372, "grad_norm": 1.0940070152282715, "learning_rate": 3.935227724789994e-06, "loss": 0.9419396400451661, "step": 110 }, { "epoch": 0.5860805860805861, "grad_norm": 1.7783108949661255, "learning_rate": 3.2912360336831093e-06, "loss": 0.9319709777832031, "step": 120 }, { "epoch": 0.6349206349206349, "grad_norm": 1.1148947477340698, "learning_rate": 2.665790273396718e-06, "loss": 0.9522204399108887, "step": 130 }, { "epoch": 0.6837606837606838, "grad_norm": 1.0567108392715454, "learning_rate": 2.0752562220367795e-06, "loss": 0.9412946701049805, "step": 140 }, { "epoch": 0.7326007326007326, "grad_norm": 1.0418899059295654, "learning_rate": 1.5350861375962904e-06, "loss": 0.9191699981689453, "step": 150 }, { "epoch": 0.7814407814407814, "grad_norm": 0.9806021451950073, "learning_rate": 1.0594144251711994e-06, "loss": 0.9190822601318359, "step": 160 }, { "epoch": 0.8302808302808303, "grad_norm": 1.0123064517974854, "learning_rate": 6.606877878829161e-07, "loss": 0.9286371231079101, "step": 170 }, { "epoch": 0.8791208791208791, "grad_norm": 1.079588770866394, "learning_rate": 3.4933953919383984e-07, "loss": 0.9289794921875, "step": 180 }, { "epoch": 0.927960927960928, "grad_norm": 1.2275781631469727, "learning_rate": 1.3351659874955546e-07, "loss": 0.919953727722168, "step": 190 }, { "epoch": 0.9768009768009768, "grad_norm": 1.0596216917037964, "learning_rate": 1.8866315333544213e-08, "loss": 0.9213088989257813, "step": 200 }, { "epoch": 1.0, "step": 205, "total_flos": 2.640143661303595e+18, "train_loss": 0.9932206619076612, "train_runtime": 1797.9362, "train_samples_per_second": 7.286, "train_steps_per_second": 0.114 } ], "logging_steps": 10, "max_steps": 205, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.640143661303595e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }