[FEATURE][MTP] Support MTP > 1 (#2708)

### What this PR does / why we need it?
[RFC:Support MTP > 1 for
DeepSeek](https://github.com/vllm-project/vllm-ascend/issues/2745)

- [x] dp1 tp16
- [x] dp4 tp4
- [x] dp2 tp 8
- [x] torchair graph

- vLLM version: v0.10.1.1
- vLLM main:
c9f7081f9c

Signed-off-by: 1092626063 <1092626063@qq.com>
This commit is contained in:
1092626063
2025-09-05 09:11:22 +08:00
committed by GitHub
parent 83eb40a51c
commit 5b3646ab21
5 changed files with 206 additions and 88 deletions

View File

@@ -20,9 +20,10 @@ def model_name():
return "wemaster/deepseek_mtp_main_random_bf16"
def test_mtp_correctness(
def mtp_correctness(
sampling_config: SamplingParams,
model_name: str,
num_speculative_tokens: int,
):
example_prompts = [
"Hello, my name is",
@@ -50,7 +51,7 @@ def test_mtp_correctness(
enable_expert_parallel=True,
speculative_config={
"method": "deepseek_mtp",
"num_speculative_tokens": 1,
"num_speculative_tokens": num_speculative_tokens,
},
enforce_eager=True,
max_model_len=2000,
@@ -74,3 +75,18 @@ def test_mtp_correctness(
# Heuristic: expect at least 66% of the prompts to match exactly
# Upon failure, inspect the outputs to check for inaccuracy.
assert matches > int(0.66 * len(ref_outputs))
del spec_llm
def test_mtp1_correctness(
sampling_config: SamplingParams,
model_name: str,
):
mtp_correctness(sampling_config, model_name, 1)
def test_mtp2_correctness(
sampling_config: SamplingParams,
model_name: str,
):
mtp_correctness(sampling_config, model_name, 2)