From 094eb0eff93d09a3a0804ad276381b187180cdce Mon Sep 17 00:00:00 2001 From: tanhaoan333 Date: Fri, 6 Mar 2026 17:24:22 +0800 Subject: [PATCH] [bugfix]Qwen-Omni quantization bugfix (#7042) ### What this PR does / why we need it? [bugfix]Qwen-Omni quantization bugfix fix Qwen-Omni quantization weight mapping to float weight ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.16.0 - vLLM main: https://github.com/vllm-project/vllm/commit/4034c3d32e30d01639459edd3ab486f56993876d --------- Signed-off-by: tanhaoan333 --- vllm_ascend/quantization/modelslim_config.py | 34 +++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/vllm_ascend/quantization/modelslim_config.py b/vllm_ascend/quantization/modelslim_config.py index 8684ed5d..3678c6f3 100644 --- a/vllm_ascend/quantization/modelslim_config.py +++ b/vllm_ascend/quantization/modelslim_config.py @@ -64,6 +64,19 @@ QUANT_MODEL_PREFIX_MAPPINGS: dict[str, dict[str, str]] = { "mm_projector.linear_1": "mm_projector.proj.0", "mm_projector.linear_2": "mm_projector.proj.2", }, + "qwen3_omni_moe": { + "language_model.lm_head.": "thinker.lm_head.", + "language_model.model.": "thinker.model.", + "visual.": "thinker.visual.", + }, + "qwen2_5_omni": { + "language_model.lm_head.": "thinker.lm_head.", + "language_model.model.": "thinker.model.", + "visual.": "thinker.visual.", + }, + "qwen2_5_omni_text": { + "language_model.": "thinker.", + }, } # key: model_type @@ -192,6 +205,11 @@ packed_modules_model_mapping: dict[str, dict[str, list[str]]] = { "k_proj", "v_proj", ], + "attn_qkv_proj": [ + "attn_q_proj", + "attn_k_proj", + "attn_v_proj", + ], "gate_up_proj": [ "gate_proj", "up_proj", @@ -204,20 +222,20 @@ packed_modules_model_mapping: dict[str, dict[str, list[str]]] = { "k_proj", "v_proj", ], - "attn.qkv": [ - "attn.q", - "attn.k", - "attn.v", - ], - "gate_up_proj": [ - "gate_proj", - "up_proj", + "attn_qkv_proj": [ + "attn_q_proj", + "attn_k_proj", + "attn_v_proj", ], "qkv": [ "q", "k", "v", ], + "gate_up_proj": [ + "gate_proj", + "up_proj", + ], }, }