diff --git a/docs/source/tutorials/Qwen3-32B-W4A4.md b/docs/source/tutorials/Qwen3-32B-W4A4.md index 6a03af84..ff7c252c 100644 --- a/docs/source/tutorials/Qwen3-32B-W4A4.md +++ b/docs/source/tutorials/Qwen3-32B-W4A4.md @@ -55,10 +55,12 @@ cd example/Qwen MODEL_PATH=/home/models/Qwen3-32B # Path to save converted weight, Replace with your local path SAVE_PATH=/home/models/Qwen3-32B-w4a4 +# Set two idle NPU cards +export ASCEND_RT_VISIBLE_DEVICES=0,1 python3 w4a4.py --model_path $MODEL_PATH \ --save_directory $SAVE_PATH \ - --calib_file ../common/qwen_qwen3_cot_w4a4.json \ + --calib_file ./calib_data/qwen3_cot_w4a4.json \ --trust_remote_code True \ --batch_size 1 ``` diff --git a/docs/source/tutorials/Qwen3-8B-W4A8.md b/docs/source/tutorials/Qwen3-8B-W4A8.md index cbfdd657..0b2be016 100644 --- a/docs/source/tutorials/Qwen3-8B-W4A8.md +++ b/docs/source/tutorials/Qwen3-8B-W4A8.md @@ -47,6 +47,8 @@ cd example/Qwen MODEL_PATH=/home/models/Qwen3-8B # Path to save converted weight, Replace with your local path SAVE_PATH=/home/models/Qwen3-8B-w4a8 +# Set an idle NPU card +export ASCEND_RT_VISIBLE_DEVICES=0 python quant_qwen.py \ --model_path $MODEL_PATH \