{ "base_model": "/home/coder/experiments/2026-04-12-017-tqa-dpo", "method": "OB_correction_DPO", "beta": 0.03, "lora_rank": 16, "lora_alpha": 32, "learning_rate": 3e-07, "epochs": 2, "batch_size": 4, "grad_accum": 2, "effective_batch_size": 8, "max_seq_length": 512, "target_modules": [ "q_proj", "k_proj", "v_proj", "o_proj" ], "train_time_seconds": 1212.3, "num_training_examples": 8847 }