Model: PKU-Alignment/ProgressGym-HistLlama3-8B-C017-pretrain-v0.2 Source: Original Platform
243 lines
6.1 KiB
JSON
243 lines
6.1 KiB
JSON
{
|
|
"best_metric": 2.4018473625183105,
|
|
"best_model_checkpoint": "./output/training_results/C017_random_sample_Meta-Llama-3-8B_pretrain_20240724/checkpoint-35802",
|
|
"epoch": 4.0,
|
|
"eval_steps": 3978,
|
|
"global_step": 39772,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00010057326762546515,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 0.0,
|
|
"loss": 2.7164,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.20004022930705018,
|
|
"grad_norm": 1.9595824499713421,
|
|
"learning_rate": 1.9943010392222595e-06,
|
|
"loss": 2.5584,
|
|
"step": 1989
|
|
},
|
|
{
|
|
"epoch": 0.40008045861410035,
|
|
"grad_norm": 1.8475727372215602,
|
|
"learning_rate": 2.237952084257793e-06,
|
|
"loss": 2.4822,
|
|
"step": 3978
|
|
},
|
|
{
|
|
"epoch": 0.40008045861410035,
|
|
"eval_loss": 2.457235813140869,
|
|
"eval_runtime": 809.2413,
|
|
"eval_samples_per_second": 87.369,
|
|
"eval_steps_per_second": 0.683,
|
|
"step": 3978
|
|
},
|
|
{
|
|
"epoch": 0.6001206879211506,
|
|
"grad_norm": 1.7662911861496735,
|
|
"learning_rate": 1.2175319826671093e-06,
|
|
"loss": 2.4419,
|
|
"step": 5967
|
|
},
|
|
{
|
|
"epoch": 0.8001609172282007,
|
|
"grad_norm": 1.7983814582083166,
|
|
"learning_rate": 6.499668127721108e-07,
|
|
"loss": 2.4215,
|
|
"step": 7956
|
|
},
|
|
{
|
|
"epoch": 0.8001609172282007,
|
|
"eval_loss": 2.4154767990112305,
|
|
"eval_runtime": 808.5614,
|
|
"eval_samples_per_second": 87.443,
|
|
"eval_steps_per_second": 0.684,
|
|
"step": 7956
|
|
},
|
|
{
|
|
"epoch": 1.000201146535251,
|
|
"grad_norm": 1.7969883421186013,
|
|
"learning_rate": 3.4535991044489427e-07,
|
|
"loss": 2.4128,
|
|
"step": 9945
|
|
},
|
|
{
|
|
"epoch": 1.2002413758423012,
|
|
"grad_norm": 1.8600898537077046,
|
|
"learning_rate": 1.8847567083143102e-07,
|
|
"loss": 2.3585,
|
|
"step": 11934
|
|
},
|
|
{
|
|
"epoch": 1.2002413758423012,
|
|
"eval_loss": 2.407348394393921,
|
|
"eval_runtime": 809.2343,
|
|
"eval_samples_per_second": 87.37,
|
|
"eval_steps_per_second": 0.683,
|
|
"step": 11934
|
|
},
|
|
{
|
|
"epoch": 1.4002816051493512,
|
|
"grad_norm": 1.9169506816386161,
|
|
"learning_rate": 1.1133168818945991e-07,
|
|
"loss": 2.3538,
|
|
"step": 13923
|
|
},
|
|
{
|
|
"epoch": 1.6003218344564014,
|
|
"grad_norm": 1.8761137775542636,
|
|
"learning_rate": 7.547427840102147e-08,
|
|
"loss": 2.3526,
|
|
"step": 15912
|
|
},
|
|
{
|
|
"epoch": 1.6003218344564014,
|
|
"eval_loss": 2.40483021736145,
|
|
"eval_runtime": 808.8539,
|
|
"eval_samples_per_second": 87.411,
|
|
"eval_steps_per_second": 0.684,
|
|
"step": 15912
|
|
},
|
|
{
|
|
"epoch": 1.8003620637634516,
|
|
"grad_norm": 1.761004624388482,
|
|
"learning_rate": 5.979347528334647e-08,
|
|
"loss": 2.3525,
|
|
"step": 17901
|
|
},
|
|
{
|
|
"epoch": 2.000402293070502,
|
|
"grad_norm": 1.813766837169378,
|
|
"learning_rate": 5.34373848297726e-08,
|
|
"loss": 2.3542,
|
|
"step": 19890
|
|
},
|
|
{
|
|
"epoch": 2.000402293070502,
|
|
"eval_loss": 2.40364408493042,
|
|
"eval_runtime": 808.719,
|
|
"eval_samples_per_second": 87.426,
|
|
"eval_steps_per_second": 0.684,
|
|
"step": 19890
|
|
},
|
|
{
|
|
"epoch": 2.200442522377552,
|
|
"grad_norm": 1.8622400969038313,
|
|
"learning_rate": 5.108120698150338e-08,
|
|
"loss": 2.3457,
|
|
"step": 21879
|
|
},
|
|
{
|
|
"epoch": 2.4004827516846023,
|
|
"grad_norm": 1.9759538842891853,
|
|
"learning_rate": 5.029702769777328e-08,
|
|
"loss": 2.3441,
|
|
"step": 23868
|
|
},
|
|
{
|
|
"epoch": 2.4004827516846023,
|
|
"eval_loss": 2.4035539627075195,
|
|
"eval_runtime": 809.1929,
|
|
"eval_samples_per_second": 87.375,
|
|
"eval_steps_per_second": 0.683,
|
|
"step": 23868
|
|
},
|
|
{
|
|
"epoch": 2.6005229809916526,
|
|
"grad_norm": 1.9777564770023575,
|
|
"learning_rate": 5.0068636563154646e-08,
|
|
"loss": 2.3446,
|
|
"step": 25857
|
|
},
|
|
{
|
|
"epoch": 2.8005632102987024,
|
|
"grad_norm": 1.8192679060780033,
|
|
"learning_rate": 5.001264640771992e-08,
|
|
"loss": 2.3451,
|
|
"step": 27846
|
|
},
|
|
{
|
|
"epoch": 2.8005632102987024,
|
|
"eval_loss": 2.4028046131134033,
|
|
"eval_runtime": 808.0983,
|
|
"eval_samples_per_second": 87.493,
|
|
"eval_steps_per_second": 0.684,
|
|
"step": 27846
|
|
},
|
|
{
|
|
"epoch": 3.0006034396057526,
|
|
"grad_norm": 2.0137358755507666,
|
|
"learning_rate": 5.0001713989719166e-08,
|
|
"loss": 2.3443,
|
|
"step": 29835
|
|
},
|
|
{
|
|
"epoch": 3.200643668912803,
|
|
"grad_norm": 1.8381002615524122,
|
|
"learning_rate": 5.0000148985750135e-08,
|
|
"loss": 2.3415,
|
|
"step": 31824
|
|
},
|
|
{
|
|
"epoch": 3.200643668912803,
|
|
"eval_loss": 2.402500867843628,
|
|
"eval_runtime": 371.3559,
|
|
"eval_samples_per_second": 190.392,
|
|
"eval_steps_per_second": 1.489,
|
|
"step": 31824
|
|
},
|
|
{
|
|
"epoch": 3.400683898219853,
|
|
"grad_norm": 1.9296099508223659,
|
|
"learning_rate": 5.0000006382565474e-08,
|
|
"loss": 2.34,
|
|
"step": 33813
|
|
},
|
|
{
|
|
"epoch": 3.6007241275269033,
|
|
"grad_norm": 1.9423815954292634,
|
|
"learning_rate": 5.0000000076751155e-08,
|
|
"loss": 2.3406,
|
|
"step": 35802
|
|
},
|
|
{
|
|
"epoch": 3.6007241275269033,
|
|
"eval_loss": 2.4018473625183105,
|
|
"eval_runtime": 346.8193,
|
|
"eval_samples_per_second": 203.861,
|
|
"eval_steps_per_second": 1.594,
|
|
"step": 35802
|
|
},
|
|
{
|
|
"epoch": 3.8007643568339535,
|
|
"grad_norm": 1.8386633630301288,
|
|
"learning_rate": 5.0000000000041456e-08,
|
|
"loss": 2.3406,
|
|
"step": 37791
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 39772,
|
|
"total_flos": 4159012790599680.0,
|
|
"train_loss": 0.7021765150223461,
|
|
"train_runtime": 17437.0559,
|
|
"train_samples_per_second": 145.971,
|
|
"train_steps_per_second": 2.281
|
|
}
|
|
],
|
|
"logging_steps": 1989,
|
|
"max_steps": 39772,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 4,
|
|
"save_steps": 3978,
|
|
"total_flos": 4159012790599680.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|