Files
20260411-190341-align-qwen-…/training_config.json

21 lines
436 B
JSON
Raw Permalink Normal View History

{
"base_model": "/home/coder/experiments/2026-04-12-017-tqa-dpo",
"method": "OB_correction_DPO",
"beta": 0.03,
"lora_rank": 16,
"lora_alpha": 32,
"learning_rate": 3e-07,
"epochs": 2,
"batch_size": 4,
"grad_accum": 2,
"effective_batch_size": 8,
"max_seq_length": 512,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj"
],
"train_time_seconds": 1212.3,
"num_training_examples": 8847
}