{ "num_iterations": 1, "num_steps": 312, "batch_size": 8, "num_generations": 8, "max_completion_length": 400, "beta": 0.005, "learning_rate": 5e-06, "mu": 1, "epsilon": 0.1, "lambda_blend": 0.1, "lambda_min": 0.01, "lambda_max": 0.3, "tau_H": 1.0, "entropy_ema_decay": 0.95, "temperature_m3po": 0.1, "use_m3po": true, "gating_type": "luong", "gating_config": { "temperature": 0.1, "rank": 256, "attn_dim": 256, "init_strategy": "xavier", "debug": false }, "gating_warmup_steps": 50, "gating_lr": 5e-06, "gating_grad_clip": 1.0, "gradient_accumulation_steps": 2, "warmup_ratio": 0.1, "seed": 123, "trial_number": 1, "model_name": "Qwen/Qwen2.5-1.5B-Instruct" }