{ "best_global_step": 40, "best_metric": 0.29921636, "best_model_checkpoint": "/data/home/scyb089/CODE/scripts/ms-swift/3b-new/v24-20250507-100951/checkpoint-40", "epoch": 2.9305331179321485, "eval_steps": 20, "global_step": 114, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.025848142164781908, "grad_norm": 2.496293067932129, "learning_rate": 9.998101535124758e-06, "loss": 0.3740294575691223, "memory(GiB)": 27.73, "step": 1, "token_acc": 0.8633032214078745, "train_speed(iter/s)": 0.068625 }, { "epoch": 0.12924071082390953, "grad_norm": 1.4705803394317627, "learning_rate": 9.952610423187516e-06, "loss": 0.3252382278442383, "memory(GiB)": 29.52, "step": 5, "token_acc": 0.8930616812586399, "train_speed(iter/s)": 0.123788 }, { "epoch": 0.25848142164781907, "grad_norm": 1.208221435546875, "learning_rate": 9.811340001546252e-06, "loss": 0.31452901363372804, "memory(GiB)": 31.84, "step": 10, "token_acc": 0.9054922186029678, "train_speed(iter/s)": 0.138172 }, { "epoch": 0.3877221324717286, "grad_norm": 0.9632206559181213, "learning_rate": 9.578866633275289e-06, "loss": 0.27907500267028806, "memory(GiB)": 31.84, "step": 15, "token_acc": 0.9115719379194631, "train_speed(iter/s)": 0.141744 }, { "epoch": 0.5169628432956381, "grad_norm": 0.8365996479988098, "learning_rate": 9.259597044191635e-06, "loss": 0.27438764572143554, "memory(GiB)": 31.84, "step": 20, "token_acc": 0.9132246566726072, "train_speed(iter/s)": 0.144692 }, { "epoch": 0.5169628432956381, "eval_loss": 0.3188421130180359, "eval_runtime": 1.163, "eval_samples_per_second": 21.496, "eval_steps_per_second": 6.019, "eval_token_acc": 0.9051328304362086, "step": 20 }, { "epoch": 0.6462035541195477, "grad_norm": 0.8677796125411987, "learning_rate": 8.859583254581604e-06, "loss": 0.26254222393035886, "memory(GiB)": 31.84, "step": 25, "token_acc": 0.912092040385067, "train_speed(iter/s)": 0.137482 }, { "epoch": 0.7754442649434572, "grad_norm": 0.7865857481956482, "learning_rate": 8.386407858128707e-06, "loss": 0.2681217908859253, "memory(GiB)": 31.85, "step": 30, "token_acc": 0.8996108553551899, "train_speed(iter/s)": 0.140721 }, { "epoch": 0.9046849757673667, "grad_norm": 0.7490976452827454, "learning_rate": 7.849040287551331e-06, "loss": 0.25801796913146974, "memory(GiB)": 31.85, "step": 35, "token_acc": 0.9076438224453822, "train_speed(iter/s)": 0.14257 }, { "epoch": 1.0258481421647818, "grad_norm": 1.092894434928894, "learning_rate": 7.257666791554448e-06, "loss": 0.27177181243896487, "memory(GiB)": 31.85, "step": 40, "token_acc": 0.9122958797447821, "train_speed(iter/s)": 0.145575 }, { "epoch": 1.0258481421647818, "eval_loss": 0.2992163598537445, "eval_runtime": 1.1551, "eval_samples_per_second": 21.643, "eval_steps_per_second": 6.06, "eval_token_acc": 0.9095605116431617, "step": 40 }, { "epoch": 1.1550888529886914, "grad_norm": 0.7573268413543701, "learning_rate": 6.6234973460234184e-06, "loss": 0.19469616413116456, "memory(GiB)": 31.85, "step": 45, "token_acc": 0.9224711780868482, "train_speed(iter/s)": 0.140248 }, { "epoch": 1.284329563812601, "grad_norm": 0.8084748387336731, "learning_rate": 5.958553159618693e-06, "loss": 0.1806863307952881, "memory(GiB)": 31.85, "step": 50, "token_acc": 0.9398337785693084, "train_speed(iter/s)": 0.141394 }, { "epoch": 1.4135702746365104, "grad_norm": 0.867699146270752, "learning_rate": 5.275438801779328e-06, "loss": 0.17451841831207277, "memory(GiB)": 31.85, "step": 55, "token_acc": 0.9418562744768266, "train_speed(iter/s)": 0.142869 }, { "epoch": 1.5428109854604202, "grad_norm": 0.663566529750824, "learning_rate": 4.587103272638339e-06, "loss": 0.17188454866409303, "memory(GiB)": 31.85, "step": 60, "token_acc": 0.9462987886944818, "train_speed(iter/s)": 0.144513 }, { "epoch": 1.5428109854604202, "eval_loss": 0.30524736642837524, "eval_runtime": 1.157, "eval_samples_per_second": 21.607, "eval_steps_per_second": 6.05, "eval_token_acc": 0.9102164644145622, "step": 60 }, { "epoch": 1.6720516962843295, "grad_norm": 0.7321382761001587, "learning_rate": 3.906594543968122e-06, "loss": 0.17072482109069825, "memory(GiB)": 31.85, "step": 65, "token_acc": 0.9364743108441489, "train_speed(iter/s)": 0.141054 }, { "epoch": 1.8012924071082392, "grad_norm": 0.7696079015731812, "learning_rate": 3.2468122240362287e-06, "loss": 0.1751842737197876, "memory(GiB)": 31.85, "step": 70, "token_acc": 0.9407936548287872, "train_speed(iter/s)": 0.142529 }, { "epoch": 1.9305331179321485, "grad_norm": 0.7385942339897156, "learning_rate": 2.6202630348146323e-06, "loss": 0.16539106369018555, "memory(GiB)": 31.85, "step": 75, "token_acc": 0.9483229542226592, "train_speed(iter/s)": 0.143018 }, { "epoch": 2.0516962843295636, "grad_norm": 0.7131247520446777, "learning_rate": 2.0388237366751005e-06, "loss": 0.1537123441696167, "memory(GiB)": 31.85, "step": 80, "token_acc": 0.9502986451706891, "train_speed(iter/s)": 0.14475 }, { "epoch": 2.0516962843295636, "eval_loss": 0.30163297057151794, "eval_runtime": 1.1613, "eval_samples_per_second": 21.528, "eval_steps_per_second": 6.028, "eval_token_acc": 0.9122663168251886, "step": 80 }, { "epoch": 2.1809369951534734, "grad_norm": 0.6918891668319702, "learning_rate": 1.5135159945300232e-06, "loss": 0.1329301118850708, "memory(GiB)": 31.85, "step": 85, "token_acc": 0.9482836196172882, "train_speed(iter/s)": 0.142599 }, { "epoch": 2.3101777059773827, "grad_norm": 0.6953617930412292, "learning_rate": 1.0542974530180327e-06, "loss": 0.13516383171081542, "memory(GiB)": 31.85, "step": 90, "token_acc": 0.95539407490218, "train_speed(iter/s)": 0.143367 }, { "epoch": 2.4394184168012925, "grad_norm": 0.6941922307014465, "learning_rate": 6.698729810778065e-07, "loss": 0.12188678979873657, "memory(GiB)": 31.85, "step": 95, "token_acc": 0.9622745490981964, "train_speed(iter/s)": 0.143797 }, { "epoch": 2.568659127625202, "grad_norm": 0.6733763217926025, "learning_rate": 3.675296639259912e-07, "loss": 0.12675585746765136, "memory(GiB)": 31.85, "step": 100, "token_acc": 0.9584026622296173, "train_speed(iter/s)": 0.144021 }, { "epoch": 2.568659127625202, "eval_loss": 0.31649884581565857, "eval_runtime": 1.1563, "eval_samples_per_second": 21.621, "eval_steps_per_second": 6.054, "eval_token_acc": 0.9116923581502132, "step": 100 }, { "epoch": 2.6978998384491115, "grad_norm": 0.6838268637657166, "learning_rate": 1.5299867030334815e-07, "loss": 0.11652226448059082, "memory(GiB)": 31.85, "step": 105, "token_acc": 0.9495417438773978, "train_speed(iter/s)": 0.142047 }, { "epoch": 2.827140549273021, "grad_norm": 0.7604183554649353, "learning_rate": 3.034661341025258e-08, "loss": 0.12787914276123047, "memory(GiB)": 31.85, "step": 110, "token_acc": 0.9542654419448875, "train_speed(iter/s)": 0.142691 }, { "epoch": 2.9305331179321485, "eval_loss": 0.31783103942871094, "eval_runtime": 1.1534, "eval_samples_per_second": 21.674, "eval_steps_per_second": 6.069, "eval_token_acc": 0.9116103640537881, "step": 114 } ], "logging_steps": 5, "max_steps": 114, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3995644452601856e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }