25 lines
518 B
JSON
25 lines
518 B
JSON
{
|
|
"base_model": "mistralai/Mistral-7B-Instruct-v0.3",
|
|
"lora_r": 64,
|
|
"lora_alpha": 128,
|
|
"lora_dropout": 0.05,
|
|
"lora_targets": [
|
|
"q_proj",
|
|
"k_proj",
|
|
"v_proj",
|
|
"o_proj",
|
|
"gate_proj",
|
|
"up_proj",
|
|
"down_proj"
|
|
],
|
|
"learning_rate": 0.0002,
|
|
"batch_size": 2,
|
|
"grad_accum": 8,
|
|
"epochs": 3,
|
|
"max_seq_length": 1536,
|
|
"train_examples": 8,
|
|
"val_examples": 3,
|
|
"final_train_loss": 1.2908076047897339,
|
|
"training_time_minutes": 4.312238333333334,
|
|
"timestamp": "20260317_234220"
|
|
} |