Files
opd_math500_S-Qwen2.5-3B-In…/run_config.json
ModelHub XC 36f9dea2f8 初始化项目,由ModelHub XC社区提供模型
Model: QpiEImitation/opd_math500_S-Qwen2.5-3B-Instruct_T-Qwen2-7B-Instruct
Source: Original Platform
2026-06-16 06:53:16 +08:00

28 lines
790 B
JSON

{
"method": "opd",
"student_model": "Qwen/Qwen2.5-3B-Instruct",
"teacher_model": "Qwen/Qwen2-7B-Instruct",
"train_dataset": "EleutherAI/hendrycks_math/all",
"eval_dataset": "HuggingFaceH4/MATH-500",
"train_samples": 7500,
"eval_samples": 500,
"opd_mode": "expectation",
"trust_region": true,
"ppo_clip_eps": 0.2,
"use_correction": false,
"correction_alpha": 0.2,
"correction_lr": 0.001,
"num_inner_steps": 10,
"replay_buffer_size": 10,
"max_steps": 800,
"num_epochs": 1.0,
"lr": 2e-06,
"batch_size": 1,
"grad_accum": 8,
"output_dir": "opd_math500_S-Qwen2.5-3B-Instruct_T-Qwen2-7B-Instruct_expectation_tr_L10_buf10_20260420_154435",
"use_lora": false,
"lora_r": null,
"lora_alpha": null,
"lora_dropout": null,
"lora_target_modules": null
}