281 lines
7.6 KiB
JSON
281 lines
7.6 KiB
JSON
{
|
|
"best_metric": 1.06145525,
|
|
"best_model_checkpoint": "/data/coding/ms-swift/output/v7-20250220-132503/checkpoint-108",
|
|
"epoch": 1.9829351535836177,
|
|
"eval_steps": 50,
|
|
"global_step": 108,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.01820250284414107,
|
|
"grad_norm": 6.1875,
|
|
"learning_rate": 1.6666666666666667e-06,
|
|
"loss": 1.5638728141784668,
|
|
"memory(GiB)": 29.73,
|
|
"step": 1,
|
|
"token_acc": 0.6939769707705934,
|
|
"train_speed(iter/s)": 0.129066
|
|
},
|
|
{
|
|
"epoch": 0.09101251422070535,
|
|
"grad_norm": 4.96875,
|
|
"learning_rate": 8.333333333333334e-06,
|
|
"loss": 1.518846035003662,
|
|
"memory(GiB)": 41.66,
|
|
"step": 5,
|
|
"token_acc": 0.7012233049968899,
|
|
"train_speed(iter/s)": 0.226379
|
|
},
|
|
{
|
|
"epoch": 0.1820250284414107,
|
|
"grad_norm": 3.515625,
|
|
"learning_rate": 9.96210254835968e-06,
|
|
"loss": 1.3327471733093261,
|
|
"memory(GiB)": 41.66,
|
|
"step": 10,
|
|
"token_acc": 0.7161527878935017,
|
|
"train_speed(iter/s)": 0.240583
|
|
},
|
|
{
|
|
"epoch": 0.27303754266211605,
|
|
"grad_norm": 2.296875,
|
|
"learning_rate": 9.809128215864096e-06,
|
|
"loss": 1.1619236946105957,
|
|
"memory(GiB)": 45.97,
|
|
"step": 15,
|
|
"token_acc": 0.7329187688216199,
|
|
"train_speed(iter/s)": 0.245364
|
|
},
|
|
{
|
|
"epoch": 0.3640500568828214,
|
|
"grad_norm": 2.296875,
|
|
"learning_rate": 9.542326359097619e-06,
|
|
"loss": 1.1952880859375,
|
|
"memory(GiB)": 45.97,
|
|
"step": 20,
|
|
"token_acc": 0.7218057637847742,
|
|
"train_speed(iter/s)": 0.252196
|
|
},
|
|
{
|
|
"epoch": 0.4550625711035267,
|
|
"grad_norm": 2.328125,
|
|
"learning_rate": 9.168011926105598e-06,
|
|
"loss": 1.111426544189453,
|
|
"memory(GiB)": 45.97,
|
|
"step": 25,
|
|
"token_acc": 0.7425333872925941,
|
|
"train_speed(iter/s)": 0.251882
|
|
},
|
|
{
|
|
"epoch": 0.5460750853242321,
|
|
"grad_norm": 2.578125,
|
|
"learning_rate": 8.695044586103297e-06,
|
|
"loss": 1.1071309089660644,
|
|
"memory(GiB)": 45.97,
|
|
"step": 30,
|
|
"token_acc": 0.7414093361083974,
|
|
"train_speed(iter/s)": 0.254082
|
|
},
|
|
{
|
|
"epoch": 0.6370875995449374,
|
|
"grad_norm": 2.078125,
|
|
"learning_rate": 8.134619029470535e-06,
|
|
"loss": 1.0020055770874023,
|
|
"memory(GiB)": 45.97,
|
|
"step": 35,
|
|
"token_acc": 0.7621102932675633,
|
|
"train_speed(iter/s)": 0.251894
|
|
},
|
|
{
|
|
"epoch": 0.7281001137656428,
|
|
"grad_norm": 2.359375,
|
|
"learning_rate": 7.500000000000001e-06,
|
|
"loss": 1.0203259468078614,
|
|
"memory(GiB)": 50.29,
|
|
"step": 40,
|
|
"token_acc": 0.7580885395117914,
|
|
"train_speed(iter/s)": 0.252412
|
|
},
|
|
{
|
|
"epoch": 0.8191126279863481,
|
|
"grad_norm": 2.21875,
|
|
"learning_rate": 6.806208330935766e-06,
|
|
"loss": 0.9908183097839356,
|
|
"memory(GiB)": 50.29,
|
|
"step": 45,
|
|
"token_acc": 0.7667093258473352,
|
|
"train_speed(iter/s)": 0.252266
|
|
},
|
|
{
|
|
"epoch": 0.9101251422070534,
|
|
"grad_norm": 2.5,
|
|
"learning_rate": 6.0696654160324875e-06,
|
|
"loss": 1.045759677886963,
|
|
"memory(GiB)": 50.29,
|
|
"step": 50,
|
|
"token_acc": 0.7470934799685781,
|
|
"train_speed(iter/s)": 0.251372
|
|
},
|
|
{
|
|
"epoch": 0.9101251422070534,
|
|
"eval_loss": 1.073840618133545,
|
|
"eval_runtime": 0.6301,
|
|
"eval_samples_per_second": 68.243,
|
|
"eval_steps_per_second": 14.283,
|
|
"eval_token_acc": 0.7637732857709076,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 1.018202502844141,
|
|
"grad_norm": 3.59375,
|
|
"learning_rate": 5.3078045306697154e-06,
|
|
"loss": 1.223165225982666,
|
|
"memory(GiB)": 57.52,
|
|
"step": 55,
|
|
"token_acc": 0.7527071344595012,
|
|
"train_speed(iter/s)": 0.246897
|
|
},
|
|
{
|
|
"epoch": 1.1092150170648465,
|
|
"grad_norm": 2.328125,
|
|
"learning_rate": 4.53865820268349e-06,
|
|
"loss": 1.0227657318115235,
|
|
"memory(GiB)": 57.52,
|
|
"step": 60,
|
|
"token_acc": 0.755286734276229,
|
|
"train_speed(iter/s)": 0.248832
|
|
},
|
|
{
|
|
"epoch": 1.2002275312855517,
|
|
"grad_norm": 2.03125,
|
|
"learning_rate": 3.7804313994581143e-06,
|
|
"loss": 0.9702803611755371,
|
|
"memory(GiB)": 57.52,
|
|
"step": 65,
|
|
"token_acc": 0.7658987281017519,
|
|
"train_speed(iter/s)": 0.248641
|
|
},
|
|
{
|
|
"epoch": 1.2912400455062572,
|
|
"grad_norm": 2.3125,
|
|
"learning_rate": 3.0510706335366034e-06,
|
|
"loss": 1.0110454559326172,
|
|
"memory(GiB)": 57.52,
|
|
"step": 70,
|
|
"token_acc": 0.7643304928863696,
|
|
"train_speed(iter/s)": 0.248241
|
|
},
|
|
{
|
|
"epoch": 1.3822525597269624,
|
|
"grad_norm": 2.390625,
|
|
"learning_rate": 2.3678391856132203e-06,
|
|
"loss": 0.9339286804199218,
|
|
"memory(GiB)": 57.52,
|
|
"step": 75,
|
|
"token_acc": 0.7785570747468379,
|
|
"train_speed(iter/s)": 0.247963
|
|
},
|
|
{
|
|
"epoch": 1.4732650739476678,
|
|
"grad_norm": 1.953125,
|
|
"learning_rate": 1.746908498978791e-06,
|
|
"loss": 0.9071330070495606,
|
|
"memory(GiB)": 57.52,
|
|
"step": 80,
|
|
"token_acc": 0.7775242441528808,
|
|
"train_speed(iter/s)": 0.248416
|
|
},
|
|
{
|
|
"epoch": 1.5642775881683733,
|
|
"grad_norm": 2.203125,
|
|
"learning_rate": 1.202975416726464e-06,
|
|
"loss": 1.0261162757873534,
|
|
"memory(GiB)": 57.52,
|
|
"step": 85,
|
|
"token_acc": 0.7509206426287888,
|
|
"train_speed(iter/s)": 0.248819
|
|
},
|
|
{
|
|
"epoch": 1.6552901023890785,
|
|
"grad_norm": 2.078125,
|
|
"learning_rate": 7.489143213519301e-07,
|
|
"loss": 0.995113468170166,
|
|
"memory(GiB)": 57.52,
|
|
"step": 90,
|
|
"token_acc": 0.7631283572516636,
|
|
"train_speed(iter/s)": 0.24959
|
|
},
|
|
{
|
|
"epoch": 1.7463026166097837,
|
|
"grad_norm": 2.1875,
|
|
"learning_rate": 3.9547241027523164e-07,
|
|
"loss": 0.9445444107055664,
|
|
"memory(GiB)": 57.52,
|
|
"step": 95,
|
|
"token_acc": 0.7700910688608404,
|
|
"train_speed(iter/s)": 0.249979
|
|
},
|
|
{
|
|
"epoch": 1.8373151308304891,
|
|
"grad_norm": 2.03125,
|
|
"learning_rate": 1.510153198249531e-07,
|
|
"loss": 0.9724701881408692,
|
|
"memory(GiB)": 57.52,
|
|
"step": 100,
|
|
"token_acc": 0.7649354027573051,
|
|
"train_speed(iter/s)": 0.250388
|
|
},
|
|
{
|
|
"epoch": 1.8373151308304891,
|
|
"eval_loss": 1.0617759227752686,
|
|
"eval_runtime": 0.6342,
|
|
"eval_samples_per_second": 67.805,
|
|
"eval_steps_per_second": 14.192,
|
|
"eval_token_acc": 0.7669441141498217,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 1.9283276450511946,
|
|
"grad_norm": 2.1875,
|
|
"learning_rate": 2.1329118524827662e-08,
|
|
"loss": 0.9729574203491211,
|
|
"memory(GiB)": 57.52,
|
|
"step": 105,
|
|
"token_acc": 0.7660818713450293,
|
|
"train_speed(iter/s)": 0.250386
|
|
},
|
|
{
|
|
"epoch": 1.9829351535836177,
|
|
"eval_loss": 1.061455249786377,
|
|
"eval_runtime": 0.6272,
|
|
"eval_samples_per_second": 68.563,
|
|
"eval_steps_per_second": 14.35,
|
|
"eval_token_acc": 0.7653586999603647,
|
|
"step": 108
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 108,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 2,
|
|
"save_steps": 5000,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 3.19180283271168e+16,
|
|
"train_batch_size": 5,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|