From 176bfc36bc6685ea7692e53584e8aa84367bb02f Mon Sep 17 00:00:00 2001
From: shaopeng-666 <lishaopeng21@huawei.com>
Date: Thu, 22 Jan 2026 20:05:25 +0800
Subject: [PATCH] [BugFix] fix 3vl dense model load quant weight (#6100)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What this PR does / why we need it?
Fix Qwen3VL dense quant model load weights Error.

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
The Qwen3VL quantized model service initialized successfully. Inference
requests are processed correctly, and valid responses are returned.

- vLLM version: v0.13.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/d68209402ddab3f54a09bc1f4de9a9495a283b60

Signed-off-by: 李少鹏 <lishaopeng21@huawei.com>
---
 vllm_ascend/quantization/quant_config.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/vllm_ascend/quantization/quant_config.py b/vllm_ascend/quantization/quant_config.py
index 1490f394..d2802947 100644
--- a/vllm_ascend/quantization/quant_config.py
+++ b/vllm_ascend/quantization/quant_config.py
@@ -210,6 +210,11 @@ QUANT_MODEL_PREFIX_MAPPINGS = {
         "language_model.lm_head.": "lm_head.",
         "language_model.model.": "model.language_model.",
     },
+    "qwen3_vl_text": {
+        "visual.": "model.visual.",
+        "language_model.lm_head.": "lm_head.",
+        "language_model.model.": "model.language_model.",
+    },
 }
 
 packed_modules_model_mapping = {