[FEATURE][MTP] Support MTP > 1 (#2708)

### What this PR does / why we need it? [RFC：Support MTP > 1 for DeepSeek](https://github.com/vllm-project/vllm-ascend/issues/2745) - [x] dp1 tp16 - [x] dp4 tp4 - [x] dp2 tp 8 - [x] torchair graph - vLLM version: v0.10.1.1 - vLLM main: c9f7081f9c Signed-off-by: 1092626063 <1092626063@qq.com>
2025-09-05 09:11:22 +08:00
parent 83eb40a51c
commit 5b3646ab21
5 changed files with 206 additions and 88 deletions
--- a/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
+++ b/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
@@ -20,9 +20,10 @@ def model_name():
    return "wemaster/deepseek_mtp_main_random_bf16"


-def test_mtp_correctness(
+def mtp_correctness(
    sampling_config: SamplingParams,
    model_name: str,
+    num_speculative_tokens: int,
 ):
    example_prompts = [
        "Hello, my name is",
@@ -50,7 +51,7 @@ def test_mtp_correctness(
            enable_expert_parallel=True,
            speculative_config={
                "method": "deepseek_mtp",
-                "num_speculative_tokens": 1,
+                "num_speculative_tokens": num_speculative_tokens,
            },
            enforce_eager=True,
            max_model_len=2000,
@@ -74,3 +75,18 @@ def test_mtp_correctness(
    # Heuristic: expect at least 66% of the prompts to match exactly
    # Upon failure, inspect the outputs to check for inaccuracy.
    assert matches > int(0.66 * len(ref_outputs))
+    del spec_llm
+
+
+def test_mtp1_correctness(
+    sampling_config: SamplingParams,
+    model_name: str,
+):
+    mtp_correctness(sampling_config, model_name, 1)
+
+
+def test_mtp2_correctness(
+    sampling_config: SamplingParams,
+    model_name: str,
+):
+    mtp_correctness(sampling_config, model_name, 2)