diff --git a/vllm_ascend/quantization/modelslim_config.py b/vllm_ascend/quantization/modelslim_config.py index f88a3a90..8684ed5d 100644 --- a/vllm_ascend/quantization/modelslim_config.py +++ b/vllm_ascend/quantization/modelslim_config.py @@ -64,13 +64,6 @@ QUANT_MODEL_PREFIX_MAPPINGS: dict[str, dict[str, str]] = { "mm_projector.linear_1": "mm_projector.proj.0", "mm_projector.linear_2": "mm_projector.proj.2", }, - "qwen3_omni_moe_thinker": { - "thinker.lm_head.": "language_model.lm_head.", - "thinker.model.": "language_model.model.", - "thinker.": "", - "lm_head.": "language_model.lm_head.", - "model.": "language_model.model.", - }, } # key: model_type @@ -193,7 +186,7 @@ packed_modules_model_mapping: dict[str, dict[str, list[str]]] = { ], "experts": ["experts.0.w1", "experts.0.w2", "experts.0.w3"], }, - "qwen3_omni_moe_text": { + "qwen3_omni_moe": { "qkv_proj": [ "q_proj", "k_proj", @@ -205,6 +198,27 @@ packed_modules_model_mapping: dict[str, dict[str, list[str]]] = { ], "experts": ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"], }, + "qwen2_5_omni": { + "qkv_proj": [ + "q_proj", + "k_proj", + "v_proj", + ], + "attn.qkv": [ + "attn.q", + "attn.k", + "attn.v", + ], + "gate_up_proj": [ + "gate_proj", + "up_proj", + ], + "qkv": [ + "q", + "k", + "v", + ], + }, }