thinkprm-reproduced/run_config.json

{
  "model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
  "repo_root": "/mnt/nlp/scratch/home/kopidaki/ThinkPRM/",
  "hf_dataset": "launch/thinkprm-1K-verification-cots",
  "hf_split": "train",
  "eval_hf_split": null,
  "eval_split_ratio": 0.0,
  "output_dir": "/mnt/nlp/scratch/home/kopidaki/outputs/thinkprm-full-trl",
  "num_train_epochs": 3.0,
  "learning_rate": 6e-05,
  "lr_scheduler_type": "constant",
  "warmup_ratio": 0.0,
  "per_device_train_batch_size": 1,
  "per_device_eval_batch_size": 8,
  "gradient_accumulation_steps": 8,
  "max_length": 4096,
  "dataloader_num_workers": 0,
  "eval_strategy": "no",
  "save_strategy": "steps",
  "save_steps": 500,
  "save_total_limit": null,
  "logging_steps": 10,
  "report_to": [
    "wandb"
  ],
  "seed": 42,
  "gradient_checkpointing": true
}
初始化项目，由ModelHub XC社区提供模型 Model: christinakopi/thinkprm-reproduced Source: Original Platform 2026-04-25 13:24:12 +08:00			`{`
			`"model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",`
			`"repo_root": "/mnt/nlp/scratch/home/kopidaki/ThinkPRM/",`
			`"hf_dataset": "launch/thinkprm-1K-verification-cots",`
			`"hf_split": "train",`
			`"eval_hf_split": null,`
			`"eval_split_ratio": 0.0,`
			`"output_dir": "/mnt/nlp/scratch/home/kopidaki/outputs/thinkprm-full-trl",`
			`"num_train_epochs": 3.0,`
			`"learning_rate": 6e-05,`
			`"lr_scheduler_type": "constant",`
			`"warmup_ratio": 0.0,`
			`"per_device_train_batch_size": 1,`
			`"per_device_eval_batch_size": 8,`
			`"gradient_accumulation_steps": 8,`
			`"max_length": 4096,`
			`"dataloader_num_workers": 0,`
			`"eval_strategy": "no",`
			`"save_strategy": "steps",`
			`"save_steps": 500,`
			`"save_total_limit": null,`
			`"logging_steps": 10,`
			`"report_to": [`
			`"wandb"`
			`],`
			`"seed": 42,`
			`"gradient_checkpointing": true`
			`}`