From 176bfc36bc6685ea7692e53584e8aa84367bb02f Mon Sep 17 00:00:00 2001 From: shaopeng-666 Date: Thu, 22 Jan 2026 20:05:25 +0800 Subject: [PATCH] [BugFix] fix 3vl dense model load quant weight (#6100) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What this PR does / why we need it? Fix Qwen3VL dense quant model load weights Error. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? The Qwen3VL quantized model service initialized successfully. Inference requests are processed correctly, and valid responses are returned. - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/d68209402ddab3f54a09bc1f4de9a9495a283b60 Signed-off-by: 李少鹏 --- vllm_ascend/quantization/quant_config.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vllm_ascend/quantization/quant_config.py b/vllm_ascend/quantization/quant_config.py index 1490f394..d2802947 100644 --- a/vllm_ascend/quantization/quant_config.py +++ b/vllm_ascend/quantization/quant_config.py @@ -210,6 +210,11 @@ QUANT_MODEL_PREFIX_MAPPINGS = { "language_model.lm_head.": "lm_head.", "language_model.model.": "model.language_model.", }, + "qwen3_vl_text": { + "visual.": "model.visual.", + "language_model.lm_head.": "lm_head.", + "language_model.model.": "model.language_model.", + }, } packed_modules_model_mapping = {