{ "num_iterations": 1, "num_steps": 500, "batch_size": 16, "num_generations": 8, "max_completion_length": 400, "beta": 0.005, "learning_rate": 5e-06, "mu": 1, "epsilon": 0.1, "lambda_blend": 0.1, "temperature_m3po": 0.1, "use_m3po": true, "gating_type": "baseline", "gating_config": { "temperature": 0.1, "rank": 128, "attn_dim": 256, "init_strategy": "xavier", "debug": false }, "gating_warmup_steps": 50, "gating_lr": 0.0005, "gating_grad_clip": 1.0, "gradient_accumulation_steps": 4, "warmup_ratio": 0.1, "seed": 42, "trial_number": 1, "model_name": "Qwen/Qwen2.5-1.5B-Instruct" }