319 lines
8.9 KiB
JSON
319 lines
8.9 KiB
JSON
{
|
|
"best_global_step": 40,
|
|
"best_metric": 0.29921636,
|
|
"best_model_checkpoint": "/data/home/scyb089/CODE/scripts/ms-swift/3b-new/v24-20250507-100951/checkpoint-40",
|
|
"epoch": 2.9305331179321485,
|
|
"eval_steps": 20,
|
|
"global_step": 114,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.025848142164781908,
|
|
"grad_norm": 2.496293067932129,
|
|
"learning_rate": 9.998101535124758e-06,
|
|
"loss": 0.3740294575691223,
|
|
"memory(GiB)": 27.73,
|
|
"step": 1,
|
|
"token_acc": 0.8633032214078745,
|
|
"train_speed(iter/s)": 0.068625
|
|
},
|
|
{
|
|
"epoch": 0.12924071082390953,
|
|
"grad_norm": 1.4705803394317627,
|
|
"learning_rate": 9.952610423187516e-06,
|
|
"loss": 0.3252382278442383,
|
|
"memory(GiB)": 29.52,
|
|
"step": 5,
|
|
"token_acc": 0.8930616812586399,
|
|
"train_speed(iter/s)": 0.123788
|
|
},
|
|
{
|
|
"epoch": 0.25848142164781907,
|
|
"grad_norm": 1.208221435546875,
|
|
"learning_rate": 9.811340001546252e-06,
|
|
"loss": 0.31452901363372804,
|
|
"memory(GiB)": 31.84,
|
|
"step": 10,
|
|
"token_acc": 0.9054922186029678,
|
|
"train_speed(iter/s)": 0.138172
|
|
},
|
|
{
|
|
"epoch": 0.3877221324717286,
|
|
"grad_norm": 0.9632206559181213,
|
|
"learning_rate": 9.578866633275289e-06,
|
|
"loss": 0.27907500267028806,
|
|
"memory(GiB)": 31.84,
|
|
"step": 15,
|
|
"token_acc": 0.9115719379194631,
|
|
"train_speed(iter/s)": 0.141744
|
|
},
|
|
{
|
|
"epoch": 0.5169628432956381,
|
|
"grad_norm": 0.8365996479988098,
|
|
"learning_rate": 9.259597044191635e-06,
|
|
"loss": 0.27438764572143554,
|
|
"memory(GiB)": 31.84,
|
|
"step": 20,
|
|
"token_acc": 0.9132246566726072,
|
|
"train_speed(iter/s)": 0.144692
|
|
},
|
|
{
|
|
"epoch": 0.5169628432956381,
|
|
"eval_loss": 0.3188421130180359,
|
|
"eval_runtime": 1.163,
|
|
"eval_samples_per_second": 21.496,
|
|
"eval_steps_per_second": 6.019,
|
|
"eval_token_acc": 0.9051328304362086,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.6462035541195477,
|
|
"grad_norm": 0.8677796125411987,
|
|
"learning_rate": 8.859583254581604e-06,
|
|
"loss": 0.26254222393035886,
|
|
"memory(GiB)": 31.84,
|
|
"step": 25,
|
|
"token_acc": 0.912092040385067,
|
|
"train_speed(iter/s)": 0.137482
|
|
},
|
|
{
|
|
"epoch": 0.7754442649434572,
|
|
"grad_norm": 0.7865857481956482,
|
|
"learning_rate": 8.386407858128707e-06,
|
|
"loss": 0.2681217908859253,
|
|
"memory(GiB)": 31.85,
|
|
"step": 30,
|
|
"token_acc": 0.8996108553551899,
|
|
"train_speed(iter/s)": 0.140721
|
|
},
|
|
{
|
|
"epoch": 0.9046849757673667,
|
|
"grad_norm": 0.7490976452827454,
|
|
"learning_rate": 7.849040287551331e-06,
|
|
"loss": 0.25801796913146974,
|
|
"memory(GiB)": 31.85,
|
|
"step": 35,
|
|
"token_acc": 0.9076438224453822,
|
|
"train_speed(iter/s)": 0.14257
|
|
},
|
|
{
|
|
"epoch": 1.0258481421647818,
|
|
"grad_norm": 1.092894434928894,
|
|
"learning_rate": 7.257666791554448e-06,
|
|
"loss": 0.27177181243896487,
|
|
"memory(GiB)": 31.85,
|
|
"step": 40,
|
|
"token_acc": 0.9122958797447821,
|
|
"train_speed(iter/s)": 0.145575
|
|
},
|
|
{
|
|
"epoch": 1.0258481421647818,
|
|
"eval_loss": 0.2992163598537445,
|
|
"eval_runtime": 1.1551,
|
|
"eval_samples_per_second": 21.643,
|
|
"eval_steps_per_second": 6.06,
|
|
"eval_token_acc": 0.9095605116431617,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 1.1550888529886914,
|
|
"grad_norm": 0.7573268413543701,
|
|
"learning_rate": 6.6234973460234184e-06,
|
|
"loss": 0.19469616413116456,
|
|
"memory(GiB)": 31.85,
|
|
"step": 45,
|
|
"token_acc": 0.9224711780868482,
|
|
"train_speed(iter/s)": 0.140248
|
|
},
|
|
{
|
|
"epoch": 1.284329563812601,
|
|
"grad_norm": 0.8084748387336731,
|
|
"learning_rate": 5.958553159618693e-06,
|
|
"loss": 0.1806863307952881,
|
|
"memory(GiB)": 31.85,
|
|
"step": 50,
|
|
"token_acc": 0.9398337785693084,
|
|
"train_speed(iter/s)": 0.141394
|
|
},
|
|
{
|
|
"epoch": 1.4135702746365104,
|
|
"grad_norm": 0.867699146270752,
|
|
"learning_rate": 5.275438801779328e-06,
|
|
"loss": 0.17451841831207277,
|
|
"memory(GiB)": 31.85,
|
|
"step": 55,
|
|
"token_acc": 0.9418562744768266,
|
|
"train_speed(iter/s)": 0.142869
|
|
},
|
|
{
|
|
"epoch": 1.5428109854604202,
|
|
"grad_norm": 0.663566529750824,
|
|
"learning_rate": 4.587103272638339e-06,
|
|
"loss": 0.17188454866409303,
|
|
"memory(GiB)": 31.85,
|
|
"step": 60,
|
|
"token_acc": 0.9462987886944818,
|
|
"train_speed(iter/s)": 0.144513
|
|
},
|
|
{
|
|
"epoch": 1.5428109854604202,
|
|
"eval_loss": 0.30524736642837524,
|
|
"eval_runtime": 1.157,
|
|
"eval_samples_per_second": 21.607,
|
|
"eval_steps_per_second": 6.05,
|
|
"eval_token_acc": 0.9102164644145622,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 1.6720516962843295,
|
|
"grad_norm": 0.7321382761001587,
|
|
"learning_rate": 3.906594543968122e-06,
|
|
"loss": 0.17072482109069825,
|
|
"memory(GiB)": 31.85,
|
|
"step": 65,
|
|
"token_acc": 0.9364743108441489,
|
|
"train_speed(iter/s)": 0.141054
|
|
},
|
|
{
|
|
"epoch": 1.8012924071082392,
|
|
"grad_norm": 0.7696079015731812,
|
|
"learning_rate": 3.2468122240362287e-06,
|
|
"loss": 0.1751842737197876,
|
|
"memory(GiB)": 31.85,
|
|
"step": 70,
|
|
"token_acc": 0.9407936548287872,
|
|
"train_speed(iter/s)": 0.142529
|
|
},
|
|
{
|
|
"epoch": 1.9305331179321485,
|
|
"grad_norm": 0.7385942339897156,
|
|
"learning_rate": 2.6202630348146323e-06,
|
|
"loss": 0.16539106369018555,
|
|
"memory(GiB)": 31.85,
|
|
"step": 75,
|
|
"token_acc": 0.9483229542226592,
|
|
"train_speed(iter/s)": 0.143018
|
|
},
|
|
{
|
|
"epoch": 2.0516962843295636,
|
|
"grad_norm": 0.7131247520446777,
|
|
"learning_rate": 2.0388237366751005e-06,
|
|
"loss": 0.1537123441696167,
|
|
"memory(GiB)": 31.85,
|
|
"step": 80,
|
|
"token_acc": 0.9502986451706891,
|
|
"train_speed(iter/s)": 0.14475
|
|
},
|
|
{
|
|
"epoch": 2.0516962843295636,
|
|
"eval_loss": 0.30163297057151794,
|
|
"eval_runtime": 1.1613,
|
|
"eval_samples_per_second": 21.528,
|
|
"eval_steps_per_second": 6.028,
|
|
"eval_token_acc": 0.9122663168251886,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 2.1809369951534734,
|
|
"grad_norm": 0.6918891668319702,
|
|
"learning_rate": 1.5135159945300232e-06,
|
|
"loss": 0.1329301118850708,
|
|
"memory(GiB)": 31.85,
|
|
"step": 85,
|
|
"token_acc": 0.9482836196172882,
|
|
"train_speed(iter/s)": 0.142599
|
|
},
|
|
{
|
|
"epoch": 2.3101777059773827,
|
|
"grad_norm": 0.6953617930412292,
|
|
"learning_rate": 1.0542974530180327e-06,
|
|
"loss": 0.13516383171081542,
|
|
"memory(GiB)": 31.85,
|
|
"step": 90,
|
|
"token_acc": 0.95539407490218,
|
|
"train_speed(iter/s)": 0.143367
|
|
},
|
|
{
|
|
"epoch": 2.4394184168012925,
|
|
"grad_norm": 0.6941922307014465,
|
|
"learning_rate": 6.698729810778065e-07,
|
|
"loss": 0.12188678979873657,
|
|
"memory(GiB)": 31.85,
|
|
"step": 95,
|
|
"token_acc": 0.9622745490981964,
|
|
"train_speed(iter/s)": 0.143797
|
|
},
|
|
{
|
|
"epoch": 2.568659127625202,
|
|
"grad_norm": 0.6733763217926025,
|
|
"learning_rate": 3.675296639259912e-07,
|
|
"loss": 0.12675585746765136,
|
|
"memory(GiB)": 31.85,
|
|
"step": 100,
|
|
"token_acc": 0.9584026622296173,
|
|
"train_speed(iter/s)": 0.144021
|
|
},
|
|
{
|
|
"epoch": 2.568659127625202,
|
|
"eval_loss": 0.31649884581565857,
|
|
"eval_runtime": 1.1563,
|
|
"eval_samples_per_second": 21.621,
|
|
"eval_steps_per_second": 6.054,
|
|
"eval_token_acc": 0.9116923581502132,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 2.6978998384491115,
|
|
"grad_norm": 0.6838268637657166,
|
|
"learning_rate": 1.5299867030334815e-07,
|
|
"loss": 0.11652226448059082,
|
|
"memory(GiB)": 31.85,
|
|
"step": 105,
|
|
"token_acc": 0.9495417438773978,
|
|
"train_speed(iter/s)": 0.142047
|
|
},
|
|
{
|
|
"epoch": 2.827140549273021,
|
|
"grad_norm": 0.7604183554649353,
|
|
"learning_rate": 3.034661341025258e-08,
|
|
"loss": 0.12787914276123047,
|
|
"memory(GiB)": 31.85,
|
|
"step": 110,
|
|
"token_acc": 0.9542654419448875,
|
|
"train_speed(iter/s)": 0.142691
|
|
},
|
|
{
|
|
"epoch": 2.9305331179321485,
|
|
"eval_loss": 0.31783103942871094,
|
|
"eval_runtime": 1.1534,
|
|
"eval_samples_per_second": 21.674,
|
|
"eval_steps_per_second": 6.069,
|
|
"eval_token_acc": 0.9116103640537881,
|
|
"step": 114
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 114,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 20,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.3995644452601856e+17,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|