[bugfix]Qwen-Omni quantization model_type bugfix (#7007)

### What this PR does / why we need it?
[bugfix]Qwen-Omni quantization model_type bugfix
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.16.0
- vLLM main:
15d76f74e2

---------

Signed-off-by: tanhaoan333 <tanhaoan@huawei.com>
This commit is contained in:
tanhaoan333
2026-03-05 16:34:34 +08:00
committed by GitHub
parent 1a7f845696
commit 1f2a083597

View File

@@ -64,13 +64,6 @@ QUANT_MODEL_PREFIX_MAPPINGS: dict[str, dict[str, str]] = {
"mm_projector.linear_1": "mm_projector.proj.0", "mm_projector.linear_1": "mm_projector.proj.0",
"mm_projector.linear_2": "mm_projector.proj.2", "mm_projector.linear_2": "mm_projector.proj.2",
}, },
"qwen3_omni_moe_thinker": {
"thinker.lm_head.": "language_model.lm_head.",
"thinker.model.": "language_model.model.",
"thinker.": "",
"lm_head.": "language_model.lm_head.",
"model.": "language_model.model.",
},
} }
# key: model_type # key: model_type
@@ -193,7 +186,7 @@ packed_modules_model_mapping: dict[str, dict[str, list[str]]] = {
], ],
"experts": ["experts.0.w1", "experts.0.w2", "experts.0.w3"], "experts": ["experts.0.w1", "experts.0.w2", "experts.0.w3"],
}, },
"qwen3_omni_moe_text": { "qwen3_omni_moe": {
"qkv_proj": [ "qkv_proj": [
"q_proj", "q_proj",
"k_proj", "k_proj",
@@ -205,6 +198,27 @@ packed_modules_model_mapping: dict[str, dict[str, list[str]]] = {
], ],
"experts": ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"], "experts": ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"],
}, },
"qwen2_5_omni": {
"qkv_proj": [
"q_proj",
"k_proj",
"v_proj",
],
"attn.qkv": [
"attn.q",
"attn.k",
"attn.v",
],
"gate_up_proj": [
"gate_proj",
"up_proj",
],
"qkv": [
"q",
"k",
"v",
],
},
} }