初始化项目，由ModelHub XC社区提供模型

Model: christinakopi/thinkprm-reproduced Source: Original Platform
2026-04-25 13:24:12 +08:00
commit 0952fa0201
27 changed files with 999 additions and 0 deletions
--- a/run_config.json
+++ b/run_config.json
@@ -0,0 +1,28 @@
+{
+  "model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+  "repo_root": "/mnt/nlp/scratch/home/kopidaki/ThinkPRM/",
+  "hf_dataset": "launch/thinkprm-1K-verification-cots",
+  "hf_split": "train",
+  "eval_hf_split": null,
+  "eval_split_ratio": 0.0,
+  "output_dir": "/mnt/nlp/scratch/home/kopidaki/outputs/thinkprm-full-trl",
+  "num_train_epochs": 3.0,
+  "learning_rate": 6e-05,
+  "lr_scheduler_type": "constant",
+  "warmup_ratio": 0.0,
+  "per_device_train_batch_size": 1,
+  "per_device_eval_batch_size": 8,
+  "gradient_accumulation_steps": 8,
+  "max_length": 4096,
+  "dataloader_num_workers": 0,
+  "eval_strategy": "no",
+  "save_strategy": "steps",
+  "save_steps": 500,
+  "save_total_limit": null,
+  "logging_steps": 10,
+  "report_to": [
+    "wandb"
+  ],
+  "seed": 42,
+  "gradient_checkpointing": true
+}