diff --git a/examples/external_online_dp/run_dp_template.sh b/examples/external_online_dp/run_dp_template.sh
index 0243ea3e..08f263ad 100644
--- a/examples/external_online_dp/run_dp_template.sh
+++ b/examples/external_online_dp/run_dp_template.sh
@@ -29,4 +29,4 @@ vllm serve model_path \
     --trust-remote-code \
     --gpu-memory-utilization 0.9 \
     --quantization ascend \
-    --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}' \
\ No newline at end of file
+    --speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
diff --git a/tests/e2e/nightly/features/test_mtpx_deepseek_r1_0528_w8a8.py b/tests/e2e/nightly/features/test_mtpx_deepseek_r1_0528_w8a8.py
index 880b44ae..7a782258 100644
--- a/tests/e2e/nightly/features/test_mtpx_deepseek_r1_0528_w8a8.py
+++ b/tests/e2e/nightly/features/test_mtpx_deepseek_r1_0528_w8a8.py
@@ -74,10 +74,7 @@ async def test_models(model: str, mode: str) -> None:
         "VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS": "3600000"
     }
     additional_config: dict[str, Any] = {}
-    speculative_config = {
-        "num_speculative_tokens": 2,
-        "method": "deepseek_mtp"
-    }
+    speculative_config = {"num_speculative_tokens": 2, "method": "mtp"}
     compilation_config = {
         "cudagraph_capture_sizes": [56],
         "cudagraph_mode": "FULL_DECODE_ONLY"
diff --git a/tests/e2e/nightly/features/test_prefix_cache_deepseek_r1_0528_w8a8.py b/tests/e2e/nightly/features/test_prefix_cache_deepseek_r1_0528_w8a8.py
index 80157588..3776e49c 100644
--- a/tests/e2e/nightly/features/test_prefix_cache_deepseek_r1_0528_w8a8.py
+++ b/tests/e2e/nightly/features/test_prefix_cache_deepseek_r1_0528_w8a8.py
@@ -84,10 +84,7 @@ async def test_models(model: str) -> None:
         "chunked_prefill_for_mla": True,
         "enable_weight_nz_layout": True
     }
-    speculative_config = {
-        "num_speculative_tokens": 1,
-        "method": "deepseek_mtp"
-    }
+    speculative_config = {"num_speculative_tokens": 1, "method": "mtp"}
     server_args = [
         "--quantization", "ascend", "--data-parallel-size", "2",
         "--tensor-parallel-size", "8", "--enable-expert-parallel", "--port",
diff --git a/tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py b/tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py
index 35082edb..7a76a4a1 100644
--- a/tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py
+++ b/tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py
@@ -76,10 +76,7 @@ async def test_models(model: str, mode: str) -> None:
         "HCCL_BUFFSIZE": "1024",
         "PYTORCH_NPU_ALLOC_CONF": "expandable_segments:True"
     }
-    speculative_config = {
-        "num_speculative_tokens": 1,
-        "method": "deepseek_mtp"
-    }
+    speculative_config = {"num_speculative_tokens": 1, "method": "mtp"}
     additional_config = {
         "torchair_graph_config": {
             "enabled": True,
diff --git a/tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py b/tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py
index 6413aba0..3f504ae9 100644
--- a/tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py
+++ b/tests/e2e/nightly/models/test_deepseek_r1_w8a8_eplb.py
@@ -62,10 +62,7 @@ async def test_models(model: str) -> None:
         "DISABLE_L2_CACHE": "1",
         "DYNAMIC_EPLB": "true",
     }
-    speculative_config = {
-        "num_speculative_tokens": 1,
-        "method": "deepseek_mtp"
-    }
+    speculative_config = {"num_speculative_tokens": 1, "method": "mtp"}
     compilation_config = {
         "cudagraph_capture_sizes": [24],
         "cudagraph_mode": "FULL_DECODE_ONLY"
diff --git a/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2-torchair.yaml b/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2-torchair.yaml
index 7bfe3f5e..6754bdc8 100644
--- a/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2-torchair.yaml
+++ b/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2-torchair.yaml
@@ -29,7 +29,7 @@ deployment:
       --trust-remote-code
       --quantization ascend
       --gpu-memory-utilization 0.9
-      --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
+      --speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
       --additional-config '{"torchair_graph_config":{"enabled":true,"enable_multistream_moe":true},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
 
   -
@@ -50,7 +50,7 @@ deployment:
       --trust-remote-code
       --quantization ascend
       --gpu-memory-utilization 0.9
-      --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
+      --speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
       --additional-config '{"torchair_graph_config":{"enabled":true,"enable_multistream_moe":true},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
 benchmarks:
   acc:
diff --git a/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml b/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml
index 01100f29..f672dde5 100644
--- a/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml
+++ b/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml
@@ -30,7 +30,7 @@ deployment:
       --quantization ascend
       --gpu-memory-utilization 0.9
       --enforce-eager
-      --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
+      --speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
       --additional-config '{"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
 
   -
@@ -52,6 +52,6 @@ deployment:
       --quantization ascend
       --gpu-memory-utilization 0.9
       --enforce-eager
-      --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
+      --speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
       --additional-config '{"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
 benchmarks:
diff --git a/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-EPLB.yaml b/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-EPLB.yaml
index 6ca189c4..fd093735 100644
--- a/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-EPLB.yaml
+++ b/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-EPLB.yaml
@@ -39,7 +39,7 @@ deployment:
           --max-num-batched-tokens 16384
           --trust-remote-code
           --gpu-memory-utilization 0.9
-          --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
+          --speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
           --kv-transfer-config
           '{"kv_connector": "LLMDataDistCMgrConnector",
           "kv_buffer_device": "npu",
@@ -69,7 +69,7 @@ deployment:
           --max-num-batched-tokens 16384
           --trust-remote-code
           --gpu-memory-utilization 0.9
-          --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
+          --speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
           --kv-transfer-config
           '{"kv_connector": "LLMDataDistCMgrConnector",
           "kv_buffer_device": "npu",
@@ -100,7 +100,7 @@ deployment:
         --max-num-batched-tokens 256
         --trust-remote-code
         --gpu-memory-utilization 0.9
-        --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
+        --speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
         --kv-transfer-config
         '{"kv_connector": "LLMDataDistCMgrConnector",
         "kv_buffer_device": "npu",
@@ -130,7 +130,7 @@ deployment:
         --max-num-batched-tokens 256
         --trust-remote-code
         --gpu-memory-utilization 0.9
-        --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
+        --speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
         --kv-transfer-config
         '{"kv_connector": "LLMDataDistCMgrConnector",
         "kv_buffer_device": "npu",
diff --git a/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml b/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml
index 37a024b9..8b7723f5 100644
--- a/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml
+++ b/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml
@@ -38,7 +38,7 @@ deployment:
           --max-num-batched-tokens 16384
           --trust-remote-code
           --gpu-memory-utilization 0.9
-          --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
+          --speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
           --kv-transfer-config
           '{"kv_connector": "LLMDataDistCMgrConnector",
           "kv_buffer_device": "npu",
@@ -68,7 +68,7 @@ deployment:
           --max-num-batched-tokens 16384
           --trust-remote-code
           --gpu-memory-utilization 0.9
-          --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
+          --speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
           --kv-transfer-config
           '{"kv_connector": "LLMDataDistCMgrConnector",
           "kv_buffer_device": "npu",
@@ -99,7 +99,7 @@ deployment:
         --max-num-batched-tokens 256
         --trust-remote-code
         --gpu-memory-utilization 0.9
-        --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
+        --speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
         --kv-transfer-config
         '{"kv_connector": "LLMDataDistCMgrConnector",
         "kv_buffer_device": "npu",
@@ -129,7 +129,7 @@ deployment:
         --max-num-batched-tokens 256
         --trust-remote-code
         --gpu-memory-utilization 0.9
-        --speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
+        --speculative-config '{"num_speculative_tokens": 1, "method":"mtp"}'
         --kv-transfer-config
         '{"kv_connector": "LLMDataDistCMgrConnector",
         "kv_buffer_device": "npu",
diff --git a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
index 6b90ec36..99c7d51c 100644
--- a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
+++ b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
@@ -56,7 +56,7 @@ def mtp_correctness(sampling_config: SamplingParams,
                     enable_expert_parallel=True,
                     speculative_config={
                         "method":
-                        "deepseek_mtp",
+                        "mtp",
                         "num_speculative_tokens":
                         num_speculative_tokens,
                         "disable_padded_drafter_batch":
diff --git a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
index d5096717..ddaeeab9 100644
--- a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
+++ b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
@@ -58,7 +58,7 @@ def mtp_torchair_correctness(
                     distributed_executor_backend="mp",
                     enable_expert_parallel=True,
                     speculative_config={
-                        "method": "deepseek_mtp",
+                        "method": "mtp",
                         "num_speculative_tokens": 1,
                     },
                     enforce_eager=False,
diff --git a/tests/ut/torchair/test_torchair_model_runner.py b/tests/ut/torchair/test_torchair_model_runner.py
index a11726f2..bbbe82bb 100644
--- a/tests/ut/torchair/test_torchair_model_runner.py
+++ b/tests/ut/torchair/test_torchair_model_runner.py
@@ -21,7 +21,7 @@ class TestNPUTorchairModelRunner(PytestBase):
         runner.vllm_config = MagicMock(spec=VllmConfig)
 
         runner.speculative_config = MagicMock(
-            method="deepseek_mtp",
+            method="mtp",
             num_speculative_tokens=4,
             disable_padded_drafter_batch=False)
 
diff --git a/tests/ut/torchair/test_torchair_mtp_proposer.py b/tests/ut/torchair/test_torchair_mtp_proposer.py
index 50745226..ec2dc425 100644
--- a/tests/ut/torchair/test_torchair_mtp_proposer.py
+++ b/tests/ut/torchair/test_torchair_mtp_proposer.py
@@ -19,7 +19,7 @@ class TestTorchairMtpProposer(PytestBase):
         vllm_config.speculative_config = MagicMock()
         vllm_config.speculative_config.draft_model_config = MagicMock()
         vllm_config.speculative_config.draft_model_config.dtype = torch.float16
-        vllm_config.speculative_config.method = "deepseek_mtp"
+        vllm_config.speculative_config.method = "mtp"
         vllm_config.speculative_config.num_speculative_tokens = 5
         vllm_config.load_config = MagicMock()
         cache_config = CacheConfig(block_size=16)
diff --git a/vllm_ascend/compilation/acl_graph.py b/vllm_ascend/compilation/acl_graph.py
index 025ff3c1..4ddc1d85 100644
--- a/vllm_ascend/compilation/acl_graph.py
+++ b/vllm_ascend/compilation/acl_graph.py
@@ -257,7 +257,7 @@ def update_mla_attn_params(update_stream, forward_context, runtime_shape,
              softmax_lse) = param
             seq_lens_list = forward_context.attn_metadata[
                 key].decode.seq_lens_list
-            if speculative_config and speculative_config.method == "deepseek_mtp" \
+            if speculative_config and speculative_config.method == "mtp" \
                     and not forward_context.is_mtp_model:
                 actual_seq_lengths = forward_context.attn_metadata[
                     key].decode.actual_seq_lengths_q
diff --git a/vllm_ascend/patch/platform/__init__.py b/vllm_ascend/patch/platform/__init__.py
index 60a54e51..26c4dc86 100644
--- a/vllm_ascend/patch/platform/__init__.py
+++ b/vllm_ascend/patch/platform/__init__.py
@@ -16,7 +16,6 @@
 
 import os
 
-import vllm_ascend.patch.platform.patch_config  # noqa
 import vllm_ascend.patch.platform.patch_distributed  # noqa
 import vllm_ascend.patch.platform.patch_ec_connector  # noqa
 import vllm_ascend.patch.platform.patch_mamba_config  # noqa
diff --git a/vllm_ascend/patch/platform/patch_config.py b/vllm_ascend/patch/platform/patch_config.py
deleted file mode 100644
index b798fda3..00000000
--- a/vllm_ascend/patch/platform/patch_config.py
+++ /dev/null
@@ -1,234 +0,0 @@
-import ast
-
-from vllm.config.speculative import SpeculativeConfig
-from vllm.logger import logger
-
-
-def __post_init__(self):
-
-    # Note: "method" is a new parameter that helps to extend the
-    # configuration of non-model-based proposers, and the "model" parameter
-    # will be used to set the draft model, eagle head, or additional weight
-    # when needed. If users do not specify "method", the speculative method
-    # will be detected automatically if possible. If the speculative method
-    # can not be detected, it will be considered as the "draft_model" by
-    # default.
-
-    if self.model is None and self.num_speculative_tokens is not None:
-        # TODO(Shangming): Refactor mtp configuration logic when supporting
-        if (self.target_model_config
-                and self.target_model_config.hf_text_config.model_type
-                in ("deepseek_v3", "deepseek_v32", "mimo", "ernie4_5_moe",
-                    "qwen3_next")):
-            # use the draft model from the same model:
-            self.model = self.target_model_config.model
-            # Align the quantization of draft model for cases such as
-            # --quantization fp8 with a bf16 checkpoint.
-            if not self.quantization:
-                self.quantization = self.target_model_config.quantization
-        elif self.method in ("ngram", "[ngram]"):
-            self.model = "ngram"
-        elif self.method == "suffix":
-            self.model = "suffix"
-        else:
-            raise ValueError("num_speculative_tokens was provided but without "
-                             "speculative model.")
-
-    # Automatically configure the method for ngram when "model" is used
-    # instead of "method"
-    if self.method is None and (self.model is not None
-                                and self.model in ("ngram", "[ngram]")):
-        self.method = "ngram"
-
-    if self.method in ("ngram", "[ngram]"):
-        # Unified to "ngram" internally
-        self.method = "ngram"
-        # Set default values if not provided
-        if (self.prompt_lookup_min is None and self.prompt_lookup_max is None):
-            # TODO(woosuk): Tune these values. They are arbitrarily chosen.
-            self.prompt_lookup_min = 5
-            self.prompt_lookup_max = 5
-        elif self.prompt_lookup_min is None:
-            assert self.prompt_lookup_max is not None
-            self.prompt_lookup_min = self.prompt_lookup_max
-        elif self.prompt_lookup_max is None:
-            assert self.prompt_lookup_min is not None
-            self.prompt_lookup_max = self.prompt_lookup_min
-
-        # Validate values
-        if self.prompt_lookup_min < 1:
-            raise ValueError(
-                f"prompt_lookup_min={self.prompt_lookup_min} must be > 0")
-        if self.prompt_lookup_max < 1:
-            raise ValueError(
-                f"prompt_lookup_max={self.prompt_lookup_max} must be > 0")
-        if self.prompt_lookup_min > self.prompt_lookup_max:
-            raise ValueError(
-                f"prompt_lookup_min={self.prompt_lookup_min} must "
-                f"be <= prompt_lookup_max={self.prompt_lookup_max}")
-
-        # TODO: current we still need extract vocab_size from target model
-        # config, in future, we may try refactor it out, and set
-        # draft related config as None here.
-        self.draft_model_config = self.target_model_config
-        self.draft_parallel_config = self.target_parallel_config
-    elif self.method == "suffix":
-        self.draft_model_config = self.target_model_config
-        self.draft_parallel_config = self.target_parallel_config
-        self._validate_suffix_decoding()
-    else:
-        self.prompt_lookup_max = 0
-        self.prompt_lookup_min = 0
-
-        if self.model is not None:
-            # TODO: Move this import to the top once `ModelConfig`
-            # lives in `vllm.config.model`.
-            from vllm.config import ModelConfig
-            self.draft_model_config = ModelConfig(
-                model=self.model,
-                runner="draft",
-                tokenizer=self.target_model_config.tokenizer,
-                tokenizer_mode=self.target_model_config.tokenizer_mode,
-                trust_remote_code=self.target_model_config.trust_remote_code,
-                allowed_local_media_path=self.target_model_config.
-                allowed_local_media_path,
-                allowed_media_domains=self.target_model_config.
-                allowed_media_domains,
-                dtype=self.target_model_config.dtype,
-                seed=self.target_model_config.seed,
-                revision=self.revision,
-                code_revision=self.code_revision,
-                tokenizer_revision=self.target_model_config.tokenizer_revision,
-                spec_target_max_model_len=self.target_model_config.
-                max_model_len,
-                quantization=self.quantization,
-                enforce_eager=self.target_model_config.enforce_eager,
-                max_logprobs=self.target_model_config.max_logprobs,
-                hf_overrides=SpeculativeConfig.hf_config_override,
-            )
-
-            # Automatically detect the method
-            if self.method in ('eagle', 'eagle3'):
-                pass
-            # examples:
-            # yuhuili/EAGLE-LLaMA3-Instruct-8B
-            # yuhuili/EAGLE3-LLaMA3.1-Instruct-8B
-            # AngelSlim/Qwen3-8B_eagle3
-            elif "eagle-" in self.draft_model_config.model.lower():
-                self.method = "eagle"
-            elif "eagle3" in self.draft_model_config.model.lower():
-                self.method = "eagle3"
-            elif self.draft_model_config.hf_config.model_type == "medusa":
-                self.method = "medusa"
-            elif (self.draft_model_config.hf_config.model_type ==
-                  "mlp_speculator"):
-                self.method = "mlp_speculator"
-            elif (self.draft_model_config.hf_config.model_type
-                  in ("deepseek_mtp", "mimo_mtp", "glm4_moe_mtp")):
-                self.method = "deepseek_mtp"
-                if self.num_speculative_tokens > 1:
-                    logger.warning(
-                            "All Deepseek MTP models only have " \
-                            "one layer. Might need some code changes " \
-                            "to support multiple layers."
-                        )
-            elif (self.draft_model_config.hf_config.model_type == "ernie_mtp"):
-                self.method = "ernie_mtp"
-                if self.num_speculative_tokens > 1:
-                    logger.warning(
-                            "All Ernie MTP models only have " \
-                            "one layer. Might need some code changes " \
-                            "to support multiple layers."
-                        )
-            elif (self.draft_model_config.hf_config.model_type ==
-                  "qwen3_next_mtp"):
-                self.method = "qwen3_next_mtp"
-                if self.num_speculative_tokens > 1:
-                    logger.warning(
-                            "All Qwen3Next MTP models only have " \
-                            "one layer. Might need some code changes " \
-                            "to support multiple layers."
-                        )
-            elif (self.draft_model_config.hf_config.model_type
-                  in ("longcat_flash_mtp")):
-                self.method = "longcat_flash_mtp"
-                if self.num_speculative_tokens > 1:
-                    logger.warning(
-                            "LongCat MTP models only have " \
-                            "one layer. Might need some code changes " \
-                            "to support multiple layers."
-                        )
-            else:
-                self.method = "draft_model"
-                raise NotImplementedError(
-                    "Speculative decoding with draft model is not "
-                    "supported yet. Please consider using other "
-                    "speculative decoding methods such as ngram, medusa, "
-                    "eagle, or deepseek_mtp.")
-
-            # Replace hf_config for EAGLE draft_model
-            if self.method in ("eagle", "eagle3"):
-                from vllm.transformers_utils.configs import SpeculatorsConfig
-                from vllm.transformers_utils.configs.eagle import EAGLEConfig
-
-                if isinstance(self.draft_model_config.hf_config,
-                              (EAGLEConfig, SpeculatorsConfig)):
-                    pass
-                else:
-                    eagle_config = EAGLEConfig(
-                        self.draft_model_config.hf_config,
-                        method=self.method,
-                        model_type="eagle")
-                    self.draft_model_config.hf_config = eagle_config
-
-            if (self.num_speculative_tokens is not None
-                    and hasattr(self.draft_model_config.hf_config,
-                                "num_lookahead_tokens")):
-                self.draft_model_config.hf_config.num_lookahead_tokens = \
-                self.num_speculative_tokens
-
-            n_predict = getattr(self.draft_model_config.hf_config, "n_predict",
-                                None)
-            if n_predict is not None:
-                if self.num_speculative_tokens is None:
-                    # Default to max value defined in draft model config.
-                    self.num_speculative_tokens = n_predict
-                elif self.num_speculative_tokens > n_predict and \
-                        self.num_speculative_tokens % n_predict != 0:
-                    # Ensure divisibility for MTP module reuse.
-                    raise ValueError(
-                        f"num_speculative_tokens:{self.num_speculative_tokens}"
-                        f" must be divisible by {n_predict=}")
-
-            if self.speculative_token_tree is None:
-                # Generate chain of tokens.
-                self.speculative_token_tree = str([
-                    (i + 1) * (0, ) for i in range(self.num_speculative_tokens)
-                ])
-            else:
-                # Sort the token tree breadth-first.
-                tree_choices = ast.literal_eval(self.speculative_token_tree)
-                self.speculative_token_tree = str(
-                    sorted(tree_choices, key=lambda t: (len(t), t)))
-
-            self.draft_tensor_parallel_size = \
-                SpeculativeConfig._verify_and_get_draft_tp(
-                    self.target_parallel_config,
-                    self.draft_tensor_parallel_size,
-                    self.draft_model_config.hf_config
-            )
-
-            self.draft_model_config.max_model_len = (
-                SpeculativeConfig._maybe_override_draft_max_model_len(
-                    self.max_model_len,
-                    self.draft_model_config.max_model_len,
-                    self.target_model_config.max_model_len,
-                ))
-
-            self.draft_parallel_config = (
-                SpeculativeConfig.create_draft_parallel_config(
-                    self.target_parallel_config,
-                    self.draft_tensor_parallel_size))
-
-
-SpeculativeConfig.__post_init__ = __post_init__
diff --git a/vllm_ascend/spec_decode/__init__.py b/vllm_ascend/spec_decode/__init__.py
index a8d44875..50f65de7 100644
--- a/vllm_ascend/spec_decode/__init__.py
+++ b/vllm_ascend/spec_decode/__init__.py
@@ -32,7 +32,7 @@ def get_spec_decode_method(method,
         return NgramProposer(vllm_config, device, runner)
     elif method in ("eagle", "eagle3"):
         return EagleProposer(vllm_config, device, runner)
-    elif method in ('deepseek_mtp', 'qwen3_next_mtp'):
+    elif method == "mtp":
         if is_torchair_graph:
             return TorchairMtpProposer(vllm_config, device, runner)
         return MtpProposer(vllm_config, device, runner)
diff --git a/vllm_ascend/torchair/torchair_mla.py b/vllm_ascend/torchair/torchair_mla.py
index b1ed979c..5846bbd2 100644
--- a/vllm_ascend/torchair/torchair_mla.py
+++ b/vllm_ascend/torchair/torchair_mla.py
@@ -317,7 +317,7 @@ class AscendMLATorchairMetadataBuilder:
                          dtype=self.model_config.dtype,
                          device=device)
         if self.vllm_config.speculative_config is not None and\
-            self.vllm_config.speculative_config.method == 'deepseek_mtp':
+            self.vllm_config.speculative_config.method == 'mtp':
             attn_state = AscendAttentionState.SpecDecoding
             num_decode_tokens = 2
         else:
diff --git a/vllm_ascend/torchair/torchair_model_runner.py b/vllm_ascend/torchair/torchair_model_runner.py
index d7c55c6e..012183e2 100644
--- a/vllm_ascend/torchair/torchair_model_runner.py
+++ b/vllm_ascend/torchair/torchair_model_runner.py
@@ -501,7 +501,7 @@ class NPUTorchairModelRunner(NPUModelRunner):
     def update_torchair_graph_batch_sizes(self):
         # return graph_batch_sizes according to the max number of tokens
         # first pad according to the number of requests
-        if self.is_kv_consumer and self.speculative_config and self.speculative_config.method == 'deepseek_mtp':
+        if self.is_kv_consumer and self.speculative_config and self.speculative_config.method == 'mtp':
             # pd disaggregation scenario may incorrectly calculate the batch in mtp scenario, so we force set it to max_num_reqs
             self.torchair_graph_batch_sizes = [self.max_num_reqs]
             logger.warning(
diff --git a/vllm_ascend/torchair/torchair_sfa.py b/vllm_ascend/torchair/torchair_sfa.py
index 7e1fe325..19e88017 100644
--- a/vllm_ascend/torchair/torchair_sfa.py
+++ b/vllm_ascend/torchair/torchair_sfa.py
@@ -319,7 +319,7 @@ class AscendSFATorchairMetadataBuilder:
                          device=device)
 
         if self.vllm_config.speculative_config is not None and\
-            self.vllm_config.speculative_config.method == 'deepseek_mtp':
+            self.vllm_config.speculative_config.method == 'mtp':
             attn_state = AscendAttentionState.SpecDecoding
             num_decode_tokens = 2
         else:
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
index 04ff0287..f6bd7f08 100644
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -2044,13 +2044,13 @@ class NPUModelRunner(LoRAModelRunnerMixin, ECConnectorModelRunnerMixin):
         # We assume it is the decode stage, where prefill occurs but only one token is not hit in cache.
         elif np.all(num_scheduled_tokens == 1):
             attn_state = AscendAttentionState.DecodeOnly
-            if self.speculative_config and self.speculative_config.method == 'deepseek_mtp':
+            if self.speculative_config and self.speculative_config.method == 'mtp':
                 # SpecDecoding now supports seq_len=1 and seq_len=2
                 # In Prefilling Decoding Disaggregation scenario, SpecDecoding need to supports seq_len=1
                 attn_state = AscendAttentionState.SpecDecoding
         # Speculative decoding.
         elif np.all(num_valid_tokens == 1):
-            if self.speculative_config and self.speculative_config.method == 'deepseek_mtp':
+            if self.speculative_config and self.speculative_config.method == 'mtp':
                 attn_state = AscendAttentionState.SpecDecoding
             else:
                 attn_state = AscendAttentionState.ChunkedPrefill
@@ -2701,7 +2701,7 @@ class NPUModelRunner(LoRAModelRunnerMixin, ECConnectorModelRunnerMixin):
         with ProfileExecuteDuration().capture_async("Draft"):
             if self.speculative_config:
                 use_padded_batch_for_eagle = self.speculative_config and \
-                    self.speculative_config.method in ("deepseek_mtp", "qwen3_next_mtp") and \
+                    self.speculative_config.method == "mtp" and \
                     not self.speculative_config.disable_padded_drafter_batch
                 if use_padded_batch_for_eagle:
                     # EAGLE speculative decoding can use the GPU sampled tokens
@@ -2900,7 +2900,7 @@ class NPUModelRunner(LoRAModelRunnerMixin, ECConnectorModelRunnerMixin):
                         block_table_tensor[:num_reqs * self.decode_threshold]
                 attn_state = AscendAttentionState.DecodeOnly
                 if self.speculative_config and \
-                        self.speculative_config.method == "deepseek_mtp":
+                        self.speculative_config.method == "mtp":
                     attn_state = AscendAttentionState.SpecDecoding
 
                 common_metadata = CommonAttentionMetadata(