初始化项目,由ModelHub XC社区提供模型
Model: Alienpenguin10/M3PO-luong-trial1-seed42 Source: Original Platform
This commit is contained in:
34
training_config.json
Normal file
34
training_config.json
Normal file
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"num_iterations": 1,
|
||||
"num_steps": 312,
|
||||
"batch_size": 8,
|
||||
"num_generations": 8,
|
||||
"max_completion_length": 400,
|
||||
"beta": 0.005,
|
||||
"learning_rate": 5e-06,
|
||||
"mu": 1,
|
||||
"epsilon": 0.1,
|
||||
"lambda_blend": 0.1,
|
||||
"lambda_min": 0.01,
|
||||
"lambda_max": 0.3,
|
||||
"tau_H": 1.0,
|
||||
"entropy_ema_decay": 0.95,
|
||||
"temperature_m3po": 0.1,
|
||||
"use_m3po": true,
|
||||
"gating_type": "luong",
|
||||
"gating_config": {
|
||||
"temperature": 0.1,
|
||||
"rank": 256,
|
||||
"attn_dim": 256,
|
||||
"init_strategy": "identity",
|
||||
"debug": false
|
||||
},
|
||||
"gating_warmup_steps": 50,
|
||||
"gating_lr": 5e-06,
|
||||
"gating_grad_clip": 1.0,
|
||||
"gradient_accumulation_steps": 2,
|
||||
"warmup_ratio": 0.1,
|
||||
"seed": 42,
|
||||
"trial_number": 1,
|
||||
"model_name": "Qwen/Qwen2.5-1.5B-Instruct"
|
||||
}
|
||||
Reference in New Issue
Block a user