28 lines
815 B
JSON
28 lines
815 B
JSON
|
|
{
|
||
|
|
"model_name_or_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
||
|
|
"repo_root": "/mnt/nlp/scratch/home/kopidaki/ThinkPRM/",
|
||
|
|
"hf_dataset": "launch/thinkprm-1K-verification-cots",
|
||
|
|
"hf_split": "train",
|
||
|
|
"eval_hf_split": null,
|
||
|
|
"eval_split_ratio": 0.0,
|
||
|
|
"output_dir": "/mnt/nlp/scratch/home/kopidaki/outputs/thinkprm-full-trl",
|
||
|
|
"num_train_epochs": 3.0,
|
||
|
|
"learning_rate": 6e-05,
|
||
|
|
"lr_scheduler_type": "constant",
|
||
|
|
"warmup_ratio": 0.0,
|
||
|
|
"per_device_train_batch_size": 1,
|
||
|
|
"per_device_eval_batch_size": 8,
|
||
|
|
"gradient_accumulation_steps": 8,
|
||
|
|
"max_length": 4096,
|
||
|
|
"dataloader_num_workers": 0,
|
||
|
|
"eval_strategy": "no",
|
||
|
|
"save_strategy": "steps",
|
||
|
|
"save_steps": 500,
|
||
|
|
"save_total_limit": null,
|
||
|
|
"logging_steps": 10,
|
||
|
|
"report_to": [
|
||
|
|
"wandb"
|
||
|
|
],
|
||
|
|
"seed": 42,
|
||
|
|
"gradient_checkpointing": true
|
||
|
|
}
|