初始化项目，由ModelHub XC社区提供模型

Model: stratosphere/qwen2.5-1.5b-slips-immune-risk Source: Original Platform
2026-04-27 04:10:46 +08:00
commit 51b1b8256f
8 changed files with 583 additions and 0 deletions
--- a/config.yaml
+++ b/config.yaml
@@ -0,0 +1,125 @@
+# Qwen Fine-tuning Configuration with Unsloth
+# Risk analysis (cause + risk combined adapter) — Qwen2.5-3B, 4096 seq_len, 20GB VRAM
+
+# Model Configuration
+model:
+  model_name: "unsloth/Qwen2.5-1.5B-Instruct"  # Target deployment model (RPi5)
+  max_seq_length: 4096 # 3500 DAG tokens + 183 prompt overhead + ~400 response tokens
+  dtype: null  # Auto-detect best dtype
+  load_in_4bit: true  # QLoRA — 4-bit base model required for 20GB VRAM
+  device_map: "auto"  # Automatic device mapping
+
+  # LoRA Configuration
+  lora_r: 64  # Increased from 16 — wider subspace helps model learn synthesis over verbatim copying
+  lora_alpha: 64  # Keep equal to r with RSLoRA
+  lora_dropout: 0.0  # No dropout — 461 samples, ~174 optimizer steps; every gradient counts
+  lora_targets:  # Target modules for LoRA
+    - "q_proj"
+    - "k_proj"
+    - "v_proj"
+    - "o_proj"
+    - "gate_proj"
+    - "up_proj"
+    - "down_proj"
+  use_rslora: true  # Mandatory at r=64 to normalize gradient scaling
+  random_state: 42  # Random seed for reproducibility
+  loftq_config: null  # LoftQ configuration
+
+# Dataset Configuration
+dataset:
+  type: "local"  # Options: "huggingface", "local"
+  name: "mixed_dataset"  # Hugging Face dataset name (if type is huggingface)
+  path: "risk_combined_train_dataset.json"  # Combined interleaved cause+risk train split (1328 records)
+  eval_path: "risk_combined_eval_dataset.json"  # Combined interleaved cause+risk eval split (148 records)
+  split: "train"  # Dataset split to use
+  text_column: "messages"  # Column name containing conversations
+  use_chat_template: true  # Apply chat template formatting
+  dpo_train_path: "dpo_train_dataset.json"
+  dpo_eval_path:  "dpo_eval_dataset.json"
+
+# Training Configuration
+training:
+  mode: "sft"  # Options: "sft", "dpo", "orpo"
+
+  # Batch size and accumulation
+  per_device_train_batch_size: 1  # 8192 seq len requires batch=1 to avoid OOM
+  gradient_accumulation_steps: 16  # effective batch size = 16
+
+  # Learning rate and schedule
+  learning_rate: 0.00002  # 2e-5 — RSLoRA stability allows higher LR than r=16 config
+  lr_scheduler_type: "cosine"  # Learning rate scheduler
+  warmup_steps: 20  # 11% of ~174 steps
+  weight_decay: 0.01  # Weight decay
+
+  # Training duration
+  num_train_epochs: 3  # Number of training epochs
+  max_steps: -1  # Maximum training steps (-1 for full epochs)
+
+  # Precision and optimization
+  fp16: false  # Use BF16 instead (Ampere GPU assumed)
+  bf16: true   # BF16 — larger dynamic range than FP16, no overflow risk
+  optimizer: "adamw_8bit"  # 8-bit optimizer — keeps optimizer states within 20GB budget
+
+  # Logging and saving
+  logging_steps: 1  # Logging frequency
+  save_steps: 50  # Model save frequency
+  save_total_limit: 2  # Maximum number of saved checkpoints
+
+  # Output directory
+  output_dir: "./qwen_risk_finetuned"  # Output directory for model and checkpoints
+
+  # Data processing
+  dataset_num_proc: 2  # Number of processes for dataset processing
+  dataloader_num_workers: 0  # Number of dataloader workers
+  packing: false  # Must be false when using train_on_responses_only
+
+  # Reporting
+  report_to: []  # Reporting services (wandb, tensorboard, etc.)
+
+  # Model saving format
+  save_method: "merged_16bit"  # Options: "lora", "merged_16bit", "merged_4bit"
+  gguf_quantization: "q5_k_m"  # Also export GGUF for Ollama. Options: q4_k_m, q5_k_m, q8_0, f16. Set to null to skip.
+
+  # Reproducibility
+  seed: 42  # Random seed
+
+# DPO / ORPO Configuration
+dpo:
+  beta: 0.1             # KL penalty coefficient for DPO (standard starting point)
+  orpo_lambda: 0.1      # ORPO odds-ratio weight (same scale as DPO beta)
+  dpo_learning_rate: 0.00005  # Lower LR than SFT — DPO is sensitive to overshooting
+
+# Weights & Biases Configuration
+use_wandb: false  # Enable W&B logging
+wandb:
+  project: "qwen-finetuning"  # W&B project name
+  run_name: "qwen-dpo-stage2"  # W&B run name
+  tags: ["qwen", "unsloth", "lora"]  # W&B tags
+
+# Hardware-specific configurations
+hardware:
+  # For different GPU memory configurations
+  gpu_16gb:
+    model_name: "unsloth/Qwen1.5-3B"
+    per_device_train_batch_size: 2
+    gradient_accumulation_steps: 4
+    max_seq_length: 2048
+
+  gpu_24gb:
+    model_name: "unsloth/Qwen1.5-3B"
+    per_device_train_batch_size: 4
+    gradient_accumulation_steps: 2
+    max_seq_length: 4096
+
+  gpu_40gb:
+    model_name: "unsloth/Qwen1.5-3B"
+    per_device_train_batch_size: 2
+    gradient_accumulation_steps: 4
+    max_seq_length: 4096
+
+# Evaluation Configuration (optional)
+evaluation:
+  eval_steps: 50  # Evaluation frequency (reduced for small dataset)
+  metric_for_best_model: "loss"  # Metric to track for best model
+  load_best_model_at_end: true  # Load best model at end of training
+  save_total_limit: 2  # Keep only 2 checkpoints to save disk