[3/N][Refactor][Quantization]remove packed_modules_mapping from models (#3021)

### What this PR does / why we need it? Some custom models in vllm-ascend define packed_modules_mapping, which prevent keeping same model class with vllm community. So move these custom packed_modules_mapping to quant utils.py. After this pr, some custom models can be removed. ### Does this PR introduce _any_ user-facing change? tested by CI ### How was this patch tested? tested by CI - vLLM version: v0.10.2 - vLLM main: 5089fd749c Signed-off-by: 22dimensions <waitingwind@foxmail.com>
2025-09-19 20:50:14 +08:00
parent 4ba56716f9
commit 0942d9aaab
8 changed files with 76 additions and 80 deletions
--- a/vllm_ascend/models/deepseek_mtp.py
+++ b/vllm_ascend/models/deepseek_mtp.py
@@ -180,14 +180,6 @@ class CustomDeepSeekMultiTokenPredictor(DeepSeekMultiTokenPredictor):


 class CustomDeepSeekMTP(DeepSeekMTP):
-    # NOTE 1.The quantized MTP layer of deepseek on the NPU is not quantized;
-    # NOTE 2.The description file generated by the current msmodelslim tool does not have
-    # MTP layer info. Please manually add it and set the value to FLOAT.
-    packed_modules_mapping = {
-        "gate_up_proj": ["gate_proj", "up_proj"],
-        "experts":
-        ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"]
-    }

    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        nn.Module.__init__(self)
--- a/vllm_ascend/models/deepseek_v2.py
+++ b/vllm_ascend/models/deepseek_v2.py
@@ -320,12 +320,6 @@ class CustomDeepseekV2DecoderLayer(DeepseekV2DecoderLayer):


 class CustomDeepseekV2ForCausalLM(DeepseekV2ForCausalLM):
-    # add `packed_modules_mapping` in `DeepseekV2ForCausalLM` to support weight merging
-    packed_modules_mapping = {
-        "gate_up_proj": ["gate_proj", "up_proj"],
-        "experts":
-        ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"]
-    }

    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        nn.Module.__init__(self)
--- a/vllm_ascend/models/qwen2_5_vl.py
+++ b/vllm_ascend/models/qwen2_5_vl.py
@@ -491,17 +491,6 @@ class AscendQwen2_5_VisionTransformer(Qwen2_5_VisionTransformer):
    dummy_inputs=Qwen2_5_VLDummyInputsBuilder)
 class AscendQwen2_5_VLForConditionalGeneration(
        Qwen2_5_VLForConditionalGeneration):
-    packed_modules_mapping = {
-        "qkv_proj": [
-            "q_proj",
-            "k_proj",
-            "v_proj",
-        ],
-        "gate_up_proj": [
-            "gate_proj",
-            "up_proj",
-        ],
-    }

    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        super().__init__(vllm_config=vllm_config, prefix=prefix)
--- a/vllm_ascend/models/qwen3_moe.py
+++ b/vllm_ascend/models/qwen3_moe.py
@@ -318,19 +318,6 @@ class CustomQwen3MoeModel(Qwen3MoeModel):


 class CustomQwen3MoeForCausalLM(Qwen3MoeForCausalLM):
-    packed_modules_mapping = {
-        "qkv_proj": [
-            "q_proj",
-            "k_proj",
-            "v_proj",
-        ],
-        "gate_up_proj": [
-            "gate_proj",
-            "up_proj",
-        ],
-        "experts":
-        ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"],
-    }

    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        nn.Module.__init__(self)
--- a/vllm_ascend/models/qwen3_next.py
+++ b/vllm_ascend/models/qwen3_next.py
@@ -1166,15 +1166,6 @@ class Qwen3NextModel(nn.Module):

 class Qwen3NextForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
                           MixtureOfExperts, IsHybrid):
-    packed_modules_mapping = {
-        "qkv_proj": [
-            "q_proj",
-            "k_proj",
-            "v_proj",
-        ],
-        "gate_up_proj": ["gate_proj", "up_proj"],
-        "in_proj": ["in_proj_qkvz", "in_proj_ba"],
-    }

    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        config = vllm_config.model_config.hf_config