From a5cb8e40f55cfff665dbe428cd7fd8c0d5f9a118 Mon Sep 17 00:00:00 2001
From: InSec <158599047+InSec@users.noreply.github.com>
Date: Mon, 15 Dec 2025 20:12:06 +0800
Subject: [PATCH] [doc]Modify quantization tutorials (#5026)

### What this PR does / why we need it?
Modify quantization tutorials to correct a few mistakes:
Qwen3-32B-W4A4.md and Qwen3-8B-W4A8.md
Qwen3-8B-W4A8: need to set one idle npu card.
Qwen3-32B-W4A4: need to set two idle npu cards for the flatquant
training and modify the calib_file path which does not match the
ModeSlim version.
### Does this PR introduce _any_ user-facing change?
N/A
### How was this patch tested?

- vLLM version: v0.12.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9

Signed-off-by: IncSec <1790766300@qq.com>
---
 docs/source/tutorials/Qwen3-32B-W4A4.md | 4 +++-
 docs/source/tutorials/Qwen3-8B-W4A8.md  | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/source/tutorials/Qwen3-32B-W4A4.md b/docs/source/tutorials/Qwen3-32B-W4A4.md
index 6a03af84..ff7c252c 100644
--- a/docs/source/tutorials/Qwen3-32B-W4A4.md
+++ b/docs/source/tutorials/Qwen3-32B-W4A4.md
@@ -55,10 +55,12 @@ cd example/Qwen
 MODEL_PATH=/home/models/Qwen3-32B
 # Path to save converted weight, Replace with your local path
 SAVE_PATH=/home/models/Qwen3-32B-w4a4
+# Set two idle NPU cards
+export ASCEND_RT_VISIBLE_DEVICES=0,1
 
 python3 w4a4.py --model_path $MODEL_PATH \
                 --save_directory $SAVE_PATH \
-                --calib_file ../common/qwen_qwen3_cot_w4a4.json \
+                --calib_file ./calib_data/qwen3_cot_w4a4.json \
                 --trust_remote_code True \
                 --batch_size 1
 ```
diff --git a/docs/source/tutorials/Qwen3-8B-W4A8.md b/docs/source/tutorials/Qwen3-8B-W4A8.md
index cbfdd657..0b2be016 100644
--- a/docs/source/tutorials/Qwen3-8B-W4A8.md
+++ b/docs/source/tutorials/Qwen3-8B-W4A8.md
@@ -47,6 +47,8 @@ cd example/Qwen
 MODEL_PATH=/home/models/Qwen3-8B
 # Path to save converted weight, Replace with your local path
 SAVE_PATH=/home/models/Qwen3-8B-w4a8
+# Set an idle NPU card
+export ASCEND_RT_VISIBLE_DEVICES=0
 
 python quant_qwen.py \
           --model_path $MODEL_PATH \