[Misc] Clean up useless patch (#3320)

### What this PR does / why we need it? 1. clean up v0.10.2 support in ut and e2e test 2. remove v0.11.0 period job, we're at v0.11.0 now. 3. remove uesless patch for deepseek v3.2. They have been done in vLLM already. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-10-09 14:07:26 +08:00
parent a43e2f61e1
commit 1c5b302f0d
10 changed files with 29 additions and 412 deletions
--- a/vllm_ascend/patch/platform/patch_common/patch_config.py
+++ b/vllm_ascend/patch/platform/patch_common/patch_config.py
@@ -1,87 +1,10 @@
 import ast

 import vllm.envs as envs
-from transformers import PretrainedConfig
-from vllm.config import ModelConfig
 from vllm.config.speculative import SpeculativeConfig
 from vllm.logger import logger


-# mypy: ignore-errors
-@property
-def is_deepseek_mla(self: ModelConfig):
-    if not hasattr(self.hf_text_config, "model_type"):
-        return False
-    elif self.hf_text_config.model_type in \
-        ('deepseek_v2', 'deepseek_v3', 'deepseek_mtp',
-            'kimi_k2', 'longcat_flash', 'deepseek_v32'):
-        return self.hf_text_config.kv_lora_rank is not None
-    elif self.hf_text_config.model_type == 'eagle':
-        # if the model is an EAGLE module, check for the
-        # underlying architecture
-        return self.hf_text_config.model.model_type in \
-                ('deepseek_v2', 'deepseek_v3', 'deepseek_v32') \
-            and self.hf_text_config.kv_lora_rank is not None
-    return False
-
-
-@staticmethod
-def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
-    if hf_config.model_type in ("deepseek_v3", "deepseek_v32"):
-        hf_config.model_type = "deepseek_mtp"
-    if hf_config.model_type == "deepseek_mtp":
-        n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
-        hf_config.update({
-            "n_predict": n_predict,
-            "architectures": ["DeepSeekMTPModel"]
-        })
-
-    if hf_config.architectures[0] == "MiMoForCausalLM":
-        hf_config.model_type = "mimo_mtp"
-        n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
-        hf_config.update({
-            "num_hidden_layers": 0,
-            "n_predict": n_predict,
-            "architectures": ["MiMoMTPModel"]
-        })
-
-    if hf_config.architectures[0] == "Glm4MoeForCausalLM":
-        hf_config.model_type = "glm4_moe_mtp"
-        n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
-        hf_config.update({
-            "num_hidden_layers": 0,
-            "n_predict": n_predict,
-            "architectures": ["Glm4MoeMTPModel"]
-        })
-
-    if hf_config.model_type == "ernie4_5_moe":
-        hf_config.model_type = "ernie_mtp"
-    if hf_config.model_type == "ernie_mtp":
-        n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
-        hf_config.update({
-            "n_predict": n_predict,
-            "architectures": ["ErnieMTPModel"]
-        })
-
-    if hf_config.model_type == "qwen3_next":
-        hf_config.model_type = "qwen3_next_mtp"
-    if hf_config.model_type == "qwen3_next_mtp":
-        n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
-        hf_config.update({
-            "n_predict": n_predict,
-            "architectures": ["Qwen3NextMTP"]
-        })
-    if hf_config.model_type == "longcat_flash":
-        hf_config.model_type = "longcat_flash_mtp"
-        n_predict = getattr(hf_config, "num_nextn_predict_layers", 1)
-        hf_config.update({
-            "n_predict": n_predict,
-            "architectures": ["LongCatFlashMTPModel"]
-        })
-
-    return hf_config
-
-
 def __post_init__(self):

    # Note: "method" is a new parameter that helps to extend the
@@ -308,6 +231,4 @@ def __post_init__(self):
                    self.draft_tensor_parallel_size))


-ModelConfig.is_deepseek_mla = is_deepseek_mla
 SpeculativeConfig.__post_init__ = __post_init__
-SpeculativeConfig.hf_config_override = hf_config_override