From 74699877c92a92253eb63bac460b4e2b883d7013 Mon Sep 17 00:00:00 2001
From: jiangmengyu18 <56633611+jiangmengyu18@users.noreply.github.com>
Date: Thu, 2 Apr 2026 12:56:08 +0800
Subject: [PATCH] [v0.18.0][BugFix] fix the weightsmapper bug of qwen3-vl
 (#7868)

### What this PR does / why we need it?
This PR fixes a weight loading error in the Qwen3-VL model.
The bug was introduced by vLLM. In vLLM's `qwen3-vl.py`, the prefix of
the `lm_head` layer is hardcoded as `"lm_head"`. However,
`hf_to_vllm_mapper` remaps the weight name of `lm_head` from `lm_head`
to `language_model.lm_head`.
This causes a mismatch between the keys in the weight file and the
prefix of the lm_head layer, resulting in an error.
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- [x] Run Qwen3-VL dense model with the fusion operator, verify correct
output

Signed-off-by: betta18 <jiangmengyu1@huawei.com>
Co-authored-by: betta18 <jiangmengyu1@huawei.com>
---
 vllm_ascend/quantization/modelslim_config.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/vllm_ascend/quantization/modelslim_config.py b/vllm_ascend/quantization/modelslim_config.py
index 2e8d6212..da7e03e4 100644
--- a/vllm_ascend/quantization/modelslim_config.py
+++ b/vllm_ascend/quantization/modelslim_config.py
@@ -455,6 +455,10 @@ class AscendModelSlimConfig(QuantizationConfig):
                     parts = parts[: exp_idx + 1]
                     prefix = ".".join(parts)
 
+        # TODO: remove it when vllm fixes the WeightsMapper bug of qwen3-vl.
+        if model_type in ["qwen3_vl"] and prefix == "lm_head":
+            prefix = "language_model.lm_head"
+
         if model_type in packed_modules_model_mapping:
             self.packed_modules_mapping = packed_modules_model_mapping[model_type]
         prefix = self.quant_prefix_mapper(model_type, prefix)