16 lines
265 B
JSON
16 lines
265 B
JSON
|
|
{
|
||
|
|
"lora_rank": 16,
|
||
|
|
"lora_alpha": 32,
|
||
|
|
"lora_dropout": 0.05,
|
||
|
|
"target_modules": [
|
||
|
|
"q_proj",
|
||
|
|
"k_proj",
|
||
|
|
"v_proj",
|
||
|
|
"o_proj"
|
||
|
|
],
|
||
|
|
"learning_rate": 0.0002,
|
||
|
|
"batch_size": 2,
|
||
|
|
"gradient_accumulation_steps": 8,
|
||
|
|
"epochs": 3,
|
||
|
|
"max_length": 512
|
||
|
|
}
|