[bugfix]Qwen-Omni quantization bugfix (#7042)

### What this PR does / why we need it? [bugfix]Qwen-Omni quantization bugfix fix Qwen-Omni quantization weight mapping to float weight ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.16.0 - vLLM main: 4034c3d32e --------- Signed-off-by: tanhaoan333 <tanhaoan@huawei.com>
2026-03-06 17:24:22 +08:00
parent a51d6366b9
commit 094eb0eff9
1 changed files with 26 additions and 8 deletions
--- a/vllm_ascend/quantization/modelslim_config.py
+++ b/vllm_ascend/quantization/modelslim_config.py
@@ -64,6 +64,19 @@ QUANT_MODEL_PREFIX_MAPPINGS: dict[str, dict[str, str]] = {
        "mm_projector.linear_1": "mm_projector.proj.0",
        "mm_projector.linear_2": "mm_projector.proj.2",
    },
    "qwen3_omni_moe": {
        "language_model.lm_head.": "thinker.lm_head.",
        "language_model.model.": "thinker.model.",
        "visual.": "thinker.visual.",
    },
    "qwen2_5_omni": {
        "language_model.lm_head.": "thinker.lm_head.",
        "language_model.model.": "thinker.model.",
        "visual.": "thinker.visual.",
    },
    "qwen2_5_omni_text": {
        "language_model.": "thinker.",
    },
 }
 # key: model_type
@@ -192,6 +205,11 @@ packed_modules_model_mapping: dict[str, dict[str, list[str]]] = {
            "k_proj",
            "v_proj",
        ],
        "attn_qkv_proj": [
            "attn_q_proj",
            "attn_k_proj",
            "attn_v_proj",
        ],
        "gate_up_proj": [
            "gate_proj",
            "up_proj",
@@ -204,20 +222,20 @@ packed_modules_model_mapping: dict[str, dict[str, list[str]]] = {
            "k_proj",
            "v_proj",
        ],
-        "attn.qkv": [
+        "attn_qkv_proj": [
-            "attn.q",
+            "attn_q_proj",
-            "attn.k",
+            "attn_k_proj",
-            "attn.v",
+            "attn_v_proj",
        ],
        "gate_up_proj": [
            "gate_proj",
            "up_proj",
        ],
        "qkv": [
            "q",
            "k",
            "v",
        ],
        "gate_up_proj": [
            "gate_proj",
            "up_proj",
        ],
    },
 }