22 lines
610 B
JSON
22 lines
610 B
JSON
{
|
|
"model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
|
"model_save_name": "Llama-3.1-8B-Instruct-dragon-numbers-ft",
|
|
"learning_rate": 0.0001,
|
|
"num_train_epochs": 2,
|
|
"per_device_train_batch_size": 12,
|
|
"gradient_accumulation_steps": 1,
|
|
"lora_rank": 8,
|
|
"dataset_name": "eekay/Llama-3.1-8B-Instruct-dragon-numbers",
|
|
"lora_alpha": 8,
|
|
"train_attn": true,
|
|
"lora_layers": null,
|
|
"continue_final_message": true,
|
|
"bf16": true,
|
|
"max_grad_norm": 1.0,
|
|
"n_examples": 30000,
|
|
"logging_steps": 100,
|
|
"lr_scheduler_type": "constant",
|
|
"push_to_hub": true,
|
|
"output_dir": null,
|
|
"save_steps": null
|
|
} |