{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 166, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.030211480362537766, "grad_norm": 0.21704457700252533, "learning_rate": 4.705882352941177e-06, "loss": 0.284, "step": 5 }, { "epoch": 0.06042296072507553, "grad_norm": 0.07700642943382263, "learning_rate": 1.0588235294117648e-05, "loss": 0.091, "step": 10 }, { "epoch": 0.09063444108761329, "grad_norm": 1.0004132986068726, "learning_rate": 1.647058823529412e-05, "loss": 0.0881, "step": 15 }, { "epoch": 0.12084592145015106, "grad_norm": 0.017268722876906395, "learning_rate": 1.9991110182465032e-05, "loss": 0.0856, "step": 20 }, { "epoch": 0.1510574018126888, "grad_norm": 0.06252593547105789, "learning_rate": 1.9891281165856876e-05, "loss": 0.0776, "step": 25 }, { "epoch": 0.18126888217522658, "grad_norm": 0.013158817775547504, "learning_rate": 1.968162302997659e-05, "loss": 0.0796, "step": 30 }, { "epoch": 0.21148036253776434, "grad_norm": 0.054746970534324646, "learning_rate": 1.9364463741042694e-05, "loss": 0.0775, "step": 35 }, { "epoch": 0.24169184290030213, "grad_norm": 0.027843380346894264, "learning_rate": 1.8943324918225495e-05, "loss": 0.0776, "step": 40 }, { "epoch": 0.2719033232628399, "grad_norm": 0.09170668572187424, "learning_rate": 1.8422882730893323e-05, "loss": 0.0778, "step": 45 }, { "epoch": 0.3021148036253776, "grad_norm": 0.049589045345783234, "learning_rate": 1.7808915976161364e-05, "loss": 0.0776, "step": 50 }, { "epoch": 0.3323262839879154, "grad_norm": 0.057375721633434296, "learning_rate": 1.710824191327075e-05, "loss": 0.0787, "step": 55 }, { "epoch": 0.36253776435045315, "grad_norm": 0.04218236356973648, "learning_rate": 1.632864056726917e-05, "loss": 0.079, "step": 60 }, { "epoch": 0.39274924471299094, "grad_norm": 0.1125708743929863, "learning_rate": 1.5478768342496872e-05, "loss": 0.0776, "step": 65 }, { "epoch": 0.4229607250755287, "grad_norm": 0.024594679474830627, "learning_rate": 1.4568061905081874e-05, "loss": 0.0779, "step": 70 }, { "epoch": 0.45317220543806647, "grad_norm": 0.01017661951482296, "learning_rate": 1.3606633401697557e-05, "loss": 0.0782, "step": 75 }, { "epoch": 0.48338368580060426, "grad_norm": 0.07011096179485321, "learning_rate": 1.2605158178034656e-05, "loss": 0.0791, "step": 80 }, { "epoch": 0.513595166163142, "grad_norm": 0.01498446986079216, "learning_rate": 1.157475624372018e-05, "loss": 0.0792, "step": 85 }, { "epoch": 0.5438066465256798, "grad_norm": 0.03184051066637039, "learning_rate": 1.0526868799852797e-05, "loss": 0.0779, "step": 90 }, { "epoch": 0.5740181268882175, "grad_norm": 0.078987717628479, "learning_rate": 9.473131200147205e-06, "loss": 0.0781, "step": 95 }, { "epoch": 0.6042296072507553, "grad_norm": 0.05952491611242294, "learning_rate": 8.425243756279824e-06, "loss": 0.0771, "step": 100 }, { "epoch": 0.6344410876132931, "grad_norm": 0.014677044935524464, "learning_rate": 7.394841821965345e-06, "loss": 0.0771, "step": 105 }, { "epoch": 0.6646525679758308, "grad_norm": 0.03106486238539219, "learning_rate": 6.3933665983024465e-06, "loss": 0.0776, "step": 110 }, { "epoch": 0.6948640483383686, "grad_norm": 0.03548077121376991, "learning_rate": 5.431938094918132e-06, "loss": 0.0767, "step": 115 }, { "epoch": 0.7250755287009063, "grad_norm": 0.02386642061173916, "learning_rate": 4.5212316575031325e-06, "loss": 0.0778, "step": 120 }, { "epoch": 0.7552870090634441, "grad_norm": 0.03368431329727173, "learning_rate": 3.6713594327308343e-06, "loss": 0.0776, "step": 125 }, { "epoch": 0.7854984894259819, "grad_norm": 0.016041960567235947, "learning_rate": 2.891758086729253e-06, "loss": 0.0769, "step": 130 }, { "epoch": 0.8157099697885196, "grad_norm": 0.03274780884385109, "learning_rate": 2.19108402383864e-06, "loss": 0.0768, "step": 135 }, { "epoch": 0.8459214501510574, "grad_norm": 0.03985007107257843, "learning_rate": 1.5771172691066793e-06, "loss": 0.0765, "step": 140 }, { "epoch": 0.8761329305135952, "grad_norm": 0.02575680799782276, "learning_rate": 1.0566750817745076e-06, "loss": 0.077, "step": 145 }, { "epoch": 0.9063444108761329, "grad_norm": 0.04101819917559624, "learning_rate": 6.355362589573078e-07, "loss": 0.0758, "step": 150 }, { "epoch": 0.9365558912386707, "grad_norm": 0.06996775418519974, "learning_rate": 3.1837697002341293e-07, "loss": 0.0775, "step": 155 }, { "epoch": 0.9667673716012085, "grad_norm": 0.007283793296664953, "learning_rate": 1.0871883414312778e-07, "loss": 0.0758, "step": 160 }, { "epoch": 0.9969788519637462, "grad_norm": 0.017074227333068848, "learning_rate": 8.889817534969425e-09, "loss": 0.0768, "step": 165 }, { "epoch": 1.0, "step": 166, "total_flos": 2.8024067250035098e+17, "train_loss": 0.08477670932749667, "train_runtime": 1255.2183, "train_samples_per_second": 16.86, "train_steps_per_second": 0.132 } ], "logging_steps": 5, "max_steps": 166, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.8024067250035098e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }