Model: PKU-Alignment/ProgressGym-HistLlama3-8B-C020-pretrain-v0.2 Source: Original Platform
243 lines
6.1 KiB
JSON
243 lines
6.1 KiB
JSON
{
|
|
"best_metric": 1.9378653764724731,
|
|
"best_model_checkpoint": "./output/training_results/C020_Meta-Llama-3-8B_pretrain_20240726_033210/checkpoint-32778",
|
|
"epoch": 4.0,
|
|
"eval_steps": 3642,
|
|
"global_step": 36412,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.00010985389432055367,
|
|
"grad_norm": 0.0,
|
|
"learning_rate": 0.0,
|
|
"loss": 2.0915,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.20004394155772823,
|
|
"grad_norm": 1.880800447163735,
|
|
"learning_rate": 1.9904796777737092e-06,
|
|
"loss": 2.0663,
|
|
"step": 1821
|
|
},
|
|
{
|
|
"epoch": 0.40008788311545646,
|
|
"grad_norm": 1.91582729652135,
|
|
"learning_rate": 2.239936089625888e-06,
|
|
"loss": 2.007,
|
|
"step": 3642
|
|
},
|
|
{
|
|
"epoch": 0.40008788311545646,
|
|
"eval_loss": 1.9830611944198608,
|
|
"eval_runtime": 322.5516,
|
|
"eval_samples_per_second": 200.684,
|
|
"eval_steps_per_second": 1.569,
|
|
"step": 3642
|
|
},
|
|
{
|
|
"epoch": 0.6001318246731847,
|
|
"grad_norm": 1.8799884350488187,
|
|
"learning_rate": 1.2178786981582618e-06,
|
|
"loss": 1.9789,
|
|
"step": 5463
|
|
},
|
|
{
|
|
"epoch": 0.8001757662309129,
|
|
"grad_norm": 2.167713398694089,
|
|
"learning_rate": 6.502057578924368e-07,
|
|
"loss": 1.9635,
|
|
"step": 7284
|
|
},
|
|
{
|
|
"epoch": 0.8001757662309129,
|
|
"eval_loss": 1.9486902952194214,
|
|
"eval_runtime": 315.8318,
|
|
"eval_samples_per_second": 204.954,
|
|
"eval_steps_per_second": 1.602,
|
|
"step": 7284
|
|
},
|
|
{
|
|
"epoch": 1.000219707788641,
|
|
"grad_norm": 1.9185025050401976,
|
|
"learning_rate": 3.4539250265217177e-07,
|
|
"loss": 1.9518,
|
|
"step": 9105
|
|
},
|
|
{
|
|
"epoch": 1.2002636493463694,
|
|
"grad_norm": 1.8537277337784435,
|
|
"learning_rate": 1.8838570953925226e-07,
|
|
"loss": 1.8917,
|
|
"step": 10926
|
|
},
|
|
{
|
|
"epoch": 1.2002636493463694,
|
|
"eval_loss": 1.9423131942749023,
|
|
"eval_runtime": 315.8691,
|
|
"eval_samples_per_second": 204.93,
|
|
"eval_steps_per_second": 1.602,
|
|
"step": 10926
|
|
},
|
|
{
|
|
"epoch": 1.4003075909040976,
|
|
"grad_norm": 1.8472997131423734,
|
|
"learning_rate": 1.113186862393777e-07,
|
|
"loss": 1.8886,
|
|
"step": 12747
|
|
},
|
|
{
|
|
"epoch": 1.6003515324618258,
|
|
"grad_norm": 1.895662682121472,
|
|
"learning_rate": 7.547123756350748e-08,
|
|
"loss": 1.8884,
|
|
"step": 14568
|
|
},
|
|
{
|
|
"epoch": 1.6003515324618258,
|
|
"eval_loss": 1.9401631355285645,
|
|
"eval_runtime": 315.8338,
|
|
"eval_samples_per_second": 204.953,
|
|
"eval_steps_per_second": 1.602,
|
|
"step": 14568
|
|
},
|
|
{
|
|
"epoch": 1.800395474019554,
|
|
"grad_norm": 1.8596122602328744,
|
|
"learning_rate": 5.97978448731285e-08,
|
|
"loss": 1.891,
|
|
"step": 16389
|
|
},
|
|
{
|
|
"epoch": 2.000439415577282,
|
|
"grad_norm": 1.752487380981475,
|
|
"learning_rate": 5.343917594361068e-08,
|
|
"loss": 1.8872,
|
|
"step": 18210
|
|
},
|
|
{
|
|
"epoch": 2.000439415577282,
|
|
"eval_loss": 1.9390411376953125,
|
|
"eval_runtime": 316.1594,
|
|
"eval_samples_per_second": 204.742,
|
|
"eval_steps_per_second": 1.6,
|
|
"step": 18210
|
|
},
|
|
{
|
|
"epoch": 2.2004833571350106,
|
|
"grad_norm": 1.8941258580557327,
|
|
"learning_rate": 5.108265610728981e-08,
|
|
"loss": 1.8788,
|
|
"step": 20031
|
|
},
|
|
{
|
|
"epoch": 2.400527298692739,
|
|
"grad_norm": 1.8532314600998243,
|
|
"learning_rate": 5.0297076317689476e-08,
|
|
"loss": 1.8811,
|
|
"step": 21852
|
|
},
|
|
{
|
|
"epoch": 2.400527298692739,
|
|
"eval_loss": 1.9393320083618164,
|
|
"eval_runtime": 316.5674,
|
|
"eval_samples_per_second": 204.478,
|
|
"eval_steps_per_second": 1.598,
|
|
"step": 21852
|
|
},
|
|
{
|
|
"epoch": 2.600571240250467,
|
|
"grad_norm": 1.856636820022415,
|
|
"learning_rate": 5.006877574024932e-08,
|
|
"loss": 1.8807,
|
|
"step": 23673
|
|
},
|
|
{
|
|
"epoch": 2.8006151818081952,
|
|
"grad_norm": 1.8975919928137213,
|
|
"learning_rate": 5.001268969632882e-08,
|
|
"loss": 1.8782,
|
|
"step": 25494
|
|
},
|
|
{
|
|
"epoch": 2.8006151818081952,
|
|
"eval_loss": 1.938640832901001,
|
|
"eval_runtime": 316.1355,
|
|
"eval_samples_per_second": 204.757,
|
|
"eval_steps_per_second": 1.601,
|
|
"step": 25494
|
|
},
|
|
{
|
|
"epoch": 3.0006591233659234,
|
|
"grad_norm": 1.8638851197626498,
|
|
"learning_rate": 5.000172130703981e-08,
|
|
"loss": 1.8764,
|
|
"step": 27315
|
|
},
|
|
{
|
|
"epoch": 3.2007030649236516,
|
|
"grad_norm": 1.9182088121869934,
|
|
"learning_rate": 5.000014937976813e-08,
|
|
"loss": 1.8742,
|
|
"step": 29136
|
|
},
|
|
{
|
|
"epoch": 3.2007030649236516,
|
|
"eval_loss": 1.9384987354278564,
|
|
"eval_runtime": 316.0484,
|
|
"eval_samples_per_second": 204.814,
|
|
"eval_steps_per_second": 1.601,
|
|
"step": 29136
|
|
},
|
|
{
|
|
"epoch": 3.40074700648138,
|
|
"grad_norm": 1.9601402086429087,
|
|
"learning_rate": 5.000000644319432e-08,
|
|
"loss": 1.8775,
|
|
"step": 30957
|
|
},
|
|
{
|
|
"epoch": 3.600790948039108,
|
|
"grad_norm": 1.8139760695838174,
|
|
"learning_rate": 5.000000007721787e-08,
|
|
"loss": 1.8756,
|
|
"step": 32778
|
|
},
|
|
{
|
|
"epoch": 3.600790948039108,
|
|
"eval_loss": 1.9378653764724731,
|
|
"eval_runtime": 316.2121,
|
|
"eval_samples_per_second": 204.708,
|
|
"eval_steps_per_second": 1.6,
|
|
"step": 32778
|
|
},
|
|
{
|
|
"epoch": 3.8008348895968362,
|
|
"grad_norm": 1.9502862375669288,
|
|
"learning_rate": 5.000000000004247e-08,
|
|
"loss": 1.871,
|
|
"step": 34599
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"step": 36412,
|
|
"total_flos": 3807150279229440.0,
|
|
"train_loss": 1.9091134535458327,
|
|
"train_runtime": 51834.1311,
|
|
"train_samples_per_second": 44.957,
|
|
"train_steps_per_second": 0.702
|
|
}
|
|
],
|
|
"logging_steps": 1821,
|
|
"max_steps": 36412,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 4,
|
|
"save_steps": 3642,
|
|
"total_flos": 3807150279229440.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|