Files
qwen2.5-3b-numina-sft/run_config.json
ModelHub XC a070e334c7 初始化项目,由ModelHub XC社区提供模型
Model: christinakopi/qwen2.5-3b-numina-sft
Source: Original Platform
2026-04-23 10:32:09 +08:00

27 lines
745 B
JSON

{
"model_name_or_path": "Qwen/Qwen2.5-3B",
"hf_dataset": "christinakopi/numina_sft_19k",
"hf_train_split": "train",
"hf_eval_split": "validation",
"messages_column": "messages",
"output_dir": "/mnt/nlp/scratch/home/kopidaki/outputs/qwen2p5-3b-numina-sft",
"num_train_epochs": 3.0,
"learning_rate": 2e-05,
"lr_scheduler_type": "cosine",
"warmup_ratio": 0.03,
"per_device_train_batch_size": 1,
"per_device_eval_batch_size": 1,
"gradient_accumulation_steps": 8,
"max_length": 4096,
"dataloader_num_workers": 0,
"eval_strategy": "steps",
"save_strategy": "steps",
"save_steps": 200,
"save_total_limit": 3,
"logging_steps": 10,
"report_to": [
"wandb"
],
"seed": 42,
"gradient_checkpointing": true
}