From 094eb0eff93d09a3a0804ad276381b187180cdce Mon Sep 17 00:00:00 2001
From: tanhaoan333 <tanhaoan@huawei.com>
Date: Fri, 6 Mar 2026 17:24:22 +0800
Subject: [PATCH] [bugfix]Qwen-Omni quantization bugfix (#7042)

### What this PR does / why we need it?
[bugfix]Qwen-Omni quantization bugfix
fix Qwen-Omni quantization weight mapping to float weight
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.16.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/4034c3d32e30d01639459edd3ab486f56993876d

---------

Signed-off-by: tanhaoan333 <tanhaoan@huawei.com>
---
 vllm_ascend/quantization/modelslim_config.py | 34 +++++++++++++++-----
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/vllm_ascend/quantization/modelslim_config.py b/vllm_ascend/quantization/modelslim_config.py
index 8684ed5d..3678c6f3 100644
--- a/vllm_ascend/quantization/modelslim_config.py
+++ b/vllm_ascend/quantization/modelslim_config.py
@@ -64,6 +64,19 @@ QUANT_MODEL_PREFIX_MAPPINGS: dict[str, dict[str, str]] = {
         "mm_projector.linear_1": "mm_projector.proj.0",
         "mm_projector.linear_2": "mm_projector.proj.2",
     },
+    "qwen3_omni_moe": {
+        "language_model.lm_head.": "thinker.lm_head.",
+        "language_model.model.": "thinker.model.",
+        "visual.": "thinker.visual.",
+    },
+    "qwen2_5_omni": {
+        "language_model.lm_head.": "thinker.lm_head.",
+        "language_model.model.": "thinker.model.",
+        "visual.": "thinker.visual.",
+    },
+    "qwen2_5_omni_text": {
+        "language_model.": "thinker.",
+    },
 }
 
 # key: model_type
@@ -192,6 +205,11 @@ packed_modules_model_mapping: dict[str, dict[str, list[str]]] = {
             "k_proj",
             "v_proj",
         ],
+        "attn_qkv_proj": [
+            "attn_q_proj",
+            "attn_k_proj",
+            "attn_v_proj",
+        ],
         "gate_up_proj": [
             "gate_proj",
             "up_proj",
@@ -204,20 +222,20 @@ packed_modules_model_mapping: dict[str, dict[str, list[str]]] = {
             "k_proj",
             "v_proj",
         ],
-        "attn.qkv": [
-            "attn.q",
-            "attn.k",
-            "attn.v",
-        ],
-        "gate_up_proj": [
-            "gate_proj",
-            "up_proj",
+        "attn_qkv_proj": [
+            "attn_q_proj",
+            "attn_k_proj",
+            "attn_v_proj",
         ],
         "qkv": [
             "q",
             "k",
             "v",
         ],
+        "gate_up_proj": [
+            "gate_proj",
+            "up_proj",
+        ],
     },
 }