Model: PKU-Alignment/ProgressGym-HistLlama3-8B-C016-pretrain-v0.2 Source: Original Platform
243 lines
6.1 KiB
JSON
243 lines
6.1 KiB
JSON
{
|
|
"best_metric": 2.4261486530303955,
|
|
"best_model_checkpoint": "./output/training_results/C016_Meta-Llama-3-8B_pretrain_20240721_092214/checkpoint-11088",
|
|
"epoch": 4.0,
|
|
"eval_steps": 1232,
|
|
"global_step": 12316,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0003247807729782397,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 0.0,
|
|
"loss": 2.6721,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.20006495615459566,
|
|
"grad_norm": 1.9328745806142353,
|
|
"learning_rate": 1.9805194805194805e-06,
|
|
"loss": 2.592,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.4001299123091913,
|
|
"grad_norm": 2.0226924668887496,
|
|
"learning_rate": 2.245175689219919e-06,
|
|
"loss": 2.5057,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 0.4001299123091913,
|
|
"eval_loss": 2.4827427864074707,
|
|
"eval_runtime": 252.9711,
|
|
"eval_samples_per_second": 86.543,
|
|
"eval_steps_per_second": 0.68,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 0.6001948684637869,
|
|
"grad_norm": 1.8512197610266496,
|
|
"learning_rate": 1.2232016471327423e-06,
|
|
"loss": 2.4683,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 0.8002598246183826,
|
|
"grad_norm": 1.989781044650789,
|
|
"learning_rate": 6.53049308175953e-07,
|
|
"loss": 2.444,
|
|
"step": 2464
|
|
},
|
|
{
|
|
"epoch": 0.8002598246183826,
|
|
"eval_loss": 2.440027952194214,
|
|
"eval_runtime": 251.3004,
|
|
"eval_samples_per_second": 87.119,
|
|
"eval_steps_per_second": 0.684,
|
|
"step": 2464
|
|
},
|
|
{
|
|
"epoch": 1.0003247807729783,
|
|
"grad_norm": 1.8956736763179278,
|
|
"learning_rate": 3.466185770829244e-07,
|
|
"loss": 2.4358,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 1.200389736927574,
|
|
"grad_norm": 1.9111043304334017,
|
|
"learning_rate": 1.8910445197889315e-07,
|
|
"loss": 2.3648,
|
|
"step": 3696
|
|
},
|
|
{
|
|
"epoch": 1.200389736927574,
|
|
"eval_loss": 2.431915760040283,
|
|
"eval_runtime": 251.1577,
|
|
"eval_samples_per_second": 87.168,
|
|
"eval_steps_per_second": 0.685,
|
|
"step": 3696
|
|
},
|
|
{
|
|
"epoch": 1.4004546930821695,
|
|
"grad_norm": 1.884967762986231,
|
|
"learning_rate": 1.1168237259086467e-07,
|
|
"loss": 2.3663,
|
|
"step": 4312
|
|
},
|
|
{
|
|
"epoch": 1.600519649236765,
|
|
"grad_norm": 1.9873340014256546,
|
|
"learning_rate": 7.563133304849047e-08,
|
|
"loss": 2.372,
|
|
"step": 4928
|
|
},
|
|
{
|
|
"epoch": 1.600519649236765,
|
|
"eval_loss": 2.4293837547302246,
|
|
"eval_runtime": 251.386,
|
|
"eval_samples_per_second": 87.089,
|
|
"eval_steps_per_second": 0.684,
|
|
"step": 4928
|
|
},
|
|
{
|
|
"epoch": 1.8005846053913608,
|
|
"grad_norm": 1.9124289441200373,
|
|
"learning_rate": 5.98689431836726e-08,
|
|
"loss": 2.3684,
|
|
"step": 5544
|
|
},
|
|
{
|
|
"epoch": 2.0006495615459565,
|
|
"grad_norm": 1.9479679838446426,
|
|
"learning_rate": 5.346405476547749e-08,
|
|
"loss": 2.3667,
|
|
"step": 6160
|
|
},
|
|
{
|
|
"epoch": 2.0006495615459565,
|
|
"eval_loss": 2.4281327724456787,
|
|
"eval_runtime": 251.4248,
|
|
"eval_samples_per_second": 87.076,
|
|
"eval_steps_per_second": 0.684,
|
|
"step": 6160
|
|
},
|
|
{
|
|
"epoch": 2.2007145177005523,
|
|
"grad_norm": 1.9022764488275758,
|
|
"learning_rate": 5.109115615383696e-08,
|
|
"loss": 2.3568,
|
|
"step": 6776
|
|
},
|
|
{
|
|
"epoch": 2.400779473855148,
|
|
"grad_norm": 1.885664698080457,
|
|
"learning_rate": 5.0300090028337e-08,
|
|
"loss": 2.3573,
|
|
"step": 7392
|
|
},
|
|
{
|
|
"epoch": 2.400779473855148,
|
|
"eval_loss": 2.4281272888183594,
|
|
"eval_runtime": 250.732,
|
|
"eval_samples_per_second": 87.316,
|
|
"eval_steps_per_second": 0.686,
|
|
"step": 7392
|
|
},
|
|
{
|
|
"epoch": 2.6008444300097433,
|
|
"grad_norm": 1.9790266878041507,
|
|
"learning_rate": 5.006932020966859e-08,
|
|
"loss": 2.3533,
|
|
"step": 8008
|
|
},
|
|
{
|
|
"epoch": 2.800909386164339,
|
|
"grad_norm": 1.9987792342132904,
|
|
"learning_rate": 5.0012816199435985e-08,
|
|
"loss": 2.3603,
|
|
"step": 8624
|
|
},
|
|
{
|
|
"epoch": 2.800909386164339,
|
|
"eval_loss": 2.4273290634155273,
|
|
"eval_runtime": 251.237,
|
|
"eval_samples_per_second": 87.141,
|
|
"eval_steps_per_second": 0.685,
|
|
"step": 8624
|
|
},
|
|
{
|
|
"epoch": 3.000974342318935,
|
|
"grad_norm": 1.892342522891005,
|
|
"learning_rate": 5.0001737227175665e-08,
|
|
"loss": 2.3575,
|
|
"step": 9240
|
|
},
|
|
{
|
|
"epoch": 3.2010392984735305,
|
|
"grad_norm": 1.945849916471603,
|
|
"learning_rate": 5.0000151498505686e-08,
|
|
"loss": 2.3522,
|
|
"step": 9856
|
|
},
|
|
{
|
|
"epoch": 3.2010392984735305,
|
|
"eval_loss": 2.4268627166748047,
|
|
"eval_runtime": 251.3708,
|
|
"eval_samples_per_second": 87.094,
|
|
"eval_steps_per_second": 0.684,
|
|
"step": 9856
|
|
},
|
|
{
|
|
"epoch": 3.401104254628126,
|
|
"grad_norm": 1.9950133710544578,
|
|
"learning_rate": 5.0000006522774745e-08,
|
|
"loss": 2.3532,
|
|
"step": 10472
|
|
},
|
|
{
|
|
"epoch": 3.6011692107827216,
|
|
"grad_norm": 1.9873010505050475,
|
|
"learning_rate": 5.000000007975414e-08,
|
|
"loss": 2.353,
|
|
"step": 11088
|
|
},
|
|
{
|
|
"epoch": 3.6011692107827216,
|
|
"eval_loss": 2.4261486530303955,
|
|
"eval_runtime": 252.6429,
|
|
"eval_samples_per_second": 86.656,
|
|
"eval_steps_per_second": 0.681,
|
|
"step": 11088
|
|
},
|
|
{
|
|
"epoch": 3.8012341669373173,
|
|
"grad_norm": 1.998167327050962,
|
|
"learning_rate": 5.0000000000044414e-08,
|
|
"loss": 2.3517,
|
|
"step": 11704
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 12316,
|
|
"total_flos": 1287266123120640.0,
|
|
"train_loss": 2.391189053524001,
|
|
"train_runtime": 40307.7885,
|
|
"train_samples_per_second": 19.553,
|
|
"train_steps_per_second": 0.306
|
|
}
|
|
],
|
|
"logging_steps": 616,
|
|
"max_steps": 12316,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 4,
|
|
"save_steps": 1232,
|
|
"total_flos": 1287266123120640.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|