初始化项目,由ModelHub XC社区提供模型

Model: Alienpenguin10/M3PO-luong-trial1-seed123
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-01 02:31:12 +08:00
commit c252bf4f8e
13 changed files with 152048 additions and 0 deletions

34
training_config.json Normal file
View File

@@ -0,0 +1,34 @@
{
"num_iterations": 1,
"num_steps": 312,
"batch_size": 8,
"num_generations": 8,
"max_completion_length": 400,
"beta": 0.005,
"learning_rate": 5e-06,
"mu": 1,
"epsilon": 0.1,
"lambda_blend": 0.1,
"lambda_min": 0.01,
"lambda_max": 0.3,
"tau_H": 1.0,
"entropy_ema_decay": 0.95,
"temperature_m3po": 0.1,
"use_m3po": true,
"gating_type": "luong",
"gating_config": {
"temperature": 0.1,
"rank": 256,
"attn_dim": 256,
"init_strategy": "xavier",
"debug": false
},
"gating_warmup_steps": 50,
"gating_lr": 5e-06,
"gating_grad_clip": 1.0,
"gradient_accumulation_steps": 2,
"warmup_ratio": 0.1,
"seed": 123,
"trial_number": 1,
"model_name": "Qwen/Qwen2.5-1.5B-Instruct"
}