19 lines
467 B
JSON
19 lines
467 B
JSON
|
|
{
|
||
|
|
"base_model": "meta-llama/Llama-3.2-3B",
|
||
|
|
"fine_tuning_type": "Full Parameter Fine-tuning",
|
||
|
|
"dataset": "GSM8K",
|
||
|
|
"num_train_samples": 7473,
|
||
|
|
"batch_size": 2,
|
||
|
|
"grad_accum": 4,
|
||
|
|
"learning_rate": 1e-05,
|
||
|
|
"weight_decay": 0.01,
|
||
|
|
"warmup_ratio": 0.1,
|
||
|
|
"epochs": 3,
|
||
|
|
"max_length": 512,
|
||
|
|
"max_grad_norm": 1.0,
|
||
|
|
"lr_scheduler_type": "cosine",
|
||
|
|
"optimizer": "adamw_bnb_8bit",
|
||
|
|
"gradient_checkpointing": true,
|
||
|
|
"dtype": "bf16",
|
||
|
|
"trainer_type": "Trainer"
|
||
|
|
}
|