初始化项目，由ModelHub XC社区提供模型

Model: RedHatAI/Qwen2.5-0.5B-quantized.w8a8 Source: Original Platform
2026-04-28 14:42:56 +08:00
commit 0c98de33a8
22 changed files with 365 additions and 0 deletions
--- a/quantize_qwen2.5_fp8.sh
+++ b/quantize_qwen2.5_fp8.sh
@@ -0,0 +1,39 @@
+
+source ~/environments/clearml/bin/activate
+
+recipe_template=$(cat <<'EOF'
+quant_stage:
+  quant_modifiers:
+    QuantizationModifier:
+      ignore: ["lm_head"]
+      scheme: FP8
+      targets: ["Linear"]
+      observer: "mse"
+EOF
+)
+
+for size in 0.5B 1.5B 3B 7B 32B 72B
+do
+for version in base instruct
+do
+
+
+if [ $version = "base" ]; then
+  model="Qwen2.5-${size}"
+else
+  model="Qwen2.5-${size}-Instruct"
+fi
+
+prefix="${model//./_}""__llm_compressor__calibration__mse__512__8196__damp01"
+
+python /cache/git/research/automation/pipelines/pipeline_llmcompressor_oneshot.py \
+  --model-id "Qwen/"$model \
+  --project-name "LLM quantization - FP8/llmcompressor/Qwen2.5" \
+  --task-prefix $prefix \
+  --recipe "${recipe}" \
+  --num-samples 512 \
+  --max-seq-len 8196 \
+  --tags "Qwen2.5" "W4A16" "calibration" $size "MSE" $version
+
+done
+done