34 lines
733 B
JSON
34 lines
733 B
JSON
|
|
{
|
||
|
|
"num_iterations": 1,
|
||
|
|
"num_steps": 312,
|
||
|
|
"batch_size": 8,
|
||
|
|
"num_generations": 8,
|
||
|
|
"max_completion_length": 400,
|
||
|
|
"beta": 0.005,
|
||
|
|
"learning_rate": 5e-06,
|
||
|
|
"mu": 1,
|
||
|
|
"epsilon": 0.1,
|
||
|
|
"lambda_blend": 0.1,
|
||
|
|
"lambda_min": 0.01,
|
||
|
|
"lambda_max": 0.3,
|
||
|
|
"tau_H": 1.0,
|
||
|
|
"entropy_ema_decay": 0.95,
|
||
|
|
"temperature_m3po": 0.1,
|
||
|
|
"use_m3po": true,
|
||
|
|
"gating_type": "luong",
|
||
|
|
"gating_config": {
|
||
|
|
"temperature": 0.1,
|
||
|
|
"rank": 256,
|
||
|
|
"attn_dim": 256,
|
||
|
|
"init_strategy": "xavier",
|
||
|
|
"debug": false
|
||
|
|
},
|
||
|
|
"gating_warmup_steps": 50,
|
||
|
|
"gating_lr": 5e-06,
|
||
|
|
"gating_grad_clip": 1.0,
|
||
|
|
"gradient_accumulation_steps": 2,
|
||
|
|
"warmup_ratio": 0.1,
|
||
|
|
"seed": 123,
|
||
|
|
"trial_number": 1,
|
||
|
|
"model_name": "Qwen/Qwen2.5-1.5B-Instruct"
|
||
|
|
}
|