{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 130, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007692307692307693, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 3.1196, "step": 1 }, { "epoch": 0.038461538461538464, "grad_norm": Infinity, "learning_rate": 6.153846153846155e-06, "loss": 3.1223, "step": 5 }, { "epoch": 0.07692307692307693, "grad_norm": Infinity, "learning_rate": 1.3846153846153847e-05, "loss": 3.1246, "step": 10 }, { "epoch": 0.11538461538461539, "grad_norm": Infinity, "learning_rate": 1.9996395276708856e-05, "loss": 3.1264, "step": 15 }, { "epoch": 0.15384615384615385, "grad_norm": 35.01002883911133, "learning_rate": 1.9870502626379127e-05, "loss": 2.9981, "step": 20 }, { "epoch": 0.19230769230769232, "grad_norm": 11.861149787902832, "learning_rate": 1.9566964208274254e-05, "loss": 2.531, "step": 25 }, { "epoch": 0.23076923076923078, "grad_norm": 7.360829830169678, "learning_rate": 1.909124299802724e-05, "loss": 2.2014, "step": 30 }, { "epoch": 0.2692307692307692, "grad_norm": 6.061845779418945, "learning_rate": 1.845190085543795e-05, "loss": 2.0137, "step": 35 }, { "epoch": 0.3076923076923077, "grad_norm": 3.25486421585083, "learning_rate": 1.766044443118978e-05, "loss": 1.8515, "step": 40 }, { "epoch": 0.34615384615384615, "grad_norm": 2.8577518463134766, "learning_rate": 1.67311180742757e-05, "loss": 1.7909, "step": 45 }, { "epoch": 0.38461538461538464, "grad_norm": 3.2024171352386475, "learning_rate": 1.568064746731156e-05, "loss": 1.7104, "step": 50 }, { "epoch": 0.4230769230769231, "grad_norm": 2.368898630142212, "learning_rate": 1.4527938603696376e-05, "loss": 1.6393, "step": 55 }, { "epoch": 0.46153846153846156, "grad_norm": 2.016225814819336, "learning_rate": 1.3293737524320798e-05, "loss": 1.5979, "step": 60 }, { "epoch": 0.5, "grad_norm": 2.3714423179626465, "learning_rate": 1.2000256937760446e-05, "loss": 1.5492, "step": 65 }, { "epoch": 0.5384615384615384, "grad_norm": 2.853393793106079, "learning_rate": 1.0670776443910024e-05, "loss": 1.5275, "step": 70 }, { "epoch": 0.5769230769230769, "grad_norm": 1.7769665718078613, "learning_rate": 9.329223556089976e-06, "loss": 1.4923, "step": 75 }, { "epoch": 0.6153846153846154, "grad_norm": 2.4375193119049072, "learning_rate": 7.999743062239557e-06, "loss": 1.4681, "step": 80 }, { "epoch": 0.6538461538461539, "grad_norm": 2.583644151687622, "learning_rate": 6.706262475679205e-06, "loss": 1.4528, "step": 85 }, { "epoch": 0.6923076923076923, "grad_norm": 2.249284267425537, "learning_rate": 5.47206139630363e-06, "loss": 1.4174, "step": 90 }, { "epoch": 0.7307692307692307, "grad_norm": 2.0346739292144775, "learning_rate": 4.319352532688444e-06, "loss": 1.4044, "step": 95 }, { "epoch": 0.7692307692307693, "grad_norm": 1.5525047779083252, "learning_rate": 3.2688819257242963e-06, "loss": 1.3924, "step": 100 }, { "epoch": 0.7692307692307693, "eval_loss": 1.3881758451461792, "eval_runtime": 2.1626, "eval_samples_per_second": 413.847, "eval_steps_per_second": 3.237, "step": 100 }, { "epoch": 0.8076923076923077, "grad_norm": 1.5717474222183228, "learning_rate": 2.339555568810221e-06, "loss": 1.38, "step": 105 }, { "epoch": 0.8461538461538461, "grad_norm": 1.4260509014129639, "learning_rate": 1.5480991445620541e-06, "loss": 1.375, "step": 110 }, { "epoch": 0.8846153846153846, "grad_norm": 1.3539237976074219, "learning_rate": 9.08757001972762e-07, "loss": 1.3491, "step": 115 }, { "epoch": 0.9230769230769231, "grad_norm": 1.4586122035980225, "learning_rate": 4.3303579172574884e-07, "loss": 1.3533, "step": 120 }, { "epoch": 0.9615384615384616, "grad_norm": 1.3738539218902588, "learning_rate": 1.2949737362087156e-07, "loss": 1.3503, "step": 125 }, { "epoch": 1.0, "grad_norm": 1.3676426410675049, "learning_rate": 3.6047232911462506e-09, "loss": 1.3395, "step": 130 }, { "epoch": 1.0, "step": 130, "total_flos": 4.795466914988032e+16, "train_loss": 1.8291644793290358, "train_runtime": 404.0129, "train_samples_per_second": 40.88, "train_steps_per_second": 0.322 } ], "logging_steps": 5, "max_steps": 130, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.795466914988032e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }