初始化项目，由ModelHub XC社区提供模型

Model: QpiEImitation/opd_math500_S-Qwen2-0.5B-Instruct_T-Qwen2-7B-Instruct Source: Original Platform
2026-06-16 06:06:15 +08:00
commit 4e17d76798
10 changed files with 236 additions and 0 deletions
--- a/run_config.json
+++ b/run_config.json
@@ -0,0 +1,28 @@
+{
+  "method": "opd",
+  "student_model": "Qwen/Qwen2-0.5B-Instruct",
+  "teacher_model": "Qwen/Qwen2-7B-Instruct",
+  "train_dataset": "EleutherAI/hendrycks_math/all",
+  "eval_dataset": "HuggingFaceH4/MATH-500",
+  "train_samples": 7500,
+  "eval_samples": 500,
+  "opd_mode": "expectation",
+  "trust_region": true,
+  "ppo_clip_eps": 0.2,
+  "use_correction": false,
+  "correction_alpha": 0.2,
+  "correction_lr": 0.001,
+  "num_inner_steps": 10,
+  "replay_buffer_size": 10,
+  "max_steps": 800,
+  "num_epochs": 1.0,
+  "lr": 2e-06,
+  "batch_size": 1,
+  "grad_accum": 8,
+  "output_dir": "opd_math500_S-Qwen2-0.5B-Instruct_T-Qwen2-7B-Instruct_expectation_tr_L10_buf10_20260420_154437",
+  "use_lora": false,
+  "lora_r": null,
+  "lora_alpha": null,
+  "lora_dropout": null,
+  "lora_target_modules": null
+}