From 1a7a34c5ec59f9f5de063a43ee35469356b7ac2e Mon Sep 17 00:00:00 2001
From: Ronald <ronaldautomobile@163.com>
Date: Wed, 10 Dec 2025 11:30:22 +0800
Subject: [PATCH] add e2e test for mtp async_scheduling (#4826)

### What this PR does / why we need it?
add e2e test for mtp async scheduling
### Does this PR introduce _any_ user-facing change?
no

- vLLM version: v0.12.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9

---------

Signed-off-by: Ronald1995 <ronaldautomobile@163.com>
---
 tests/e2e/singlecard/test_async_scheduling.py | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/e2e/singlecard/test_async_scheduling.py b/tests/e2e/singlecard/test_async_scheduling.py
index 3bfbd0c9..4f4eb05f 100644
--- a/tests/e2e/singlecard/test_async_scheduling.py
+++ b/tests/e2e/singlecard/test_async_scheduling.py
@@ -13,6 +13,7 @@ from tests.e2e.conftest import VllmRunner
 from tests.e2e.model_utils import check_outputs_equal
 
 MODEL = "Qwen/Qwen3-0.6B"
+MTP_MODEL = "wemaster/deepseek_mtp_main_random_bf16"
 
 first_prompt = ("The following numbers of the sequence " +
                 ", ".join(str(i) for i in range(10)) + " are:")
@@ -44,6 +45,27 @@ def test_without_spec_decoding(monkeypatch: pytest.MonkeyPatch, ):
     run_tests(monkeypatch, MODEL, test_configs, test_sampling_params)
 
 
+def test_with_spec_decoding(monkeypatch: pytest.MonkeyPatch):
+    """Test consistency and acceptance rates with some different combos of
+    preemption, executor, async scheduling, prefill chunking,
+    spec decoding model length.
+    """
+
+    spec_config = {
+        "method": "mtp",
+        "num_speculative_tokens": 2,
+    }
+
+    # test_preemption, executor, async_scheduling,
+    # spec_config, test_prefill_chunking
+    test_configs = [
+        (False, "mp", True, spec_config, False),
+        (False, "mp", False, spec_config, False),
+    ]
+
+    run_tests(monkeypatch, MTP_MODEL, test_configs, [{}])
+
+
 @dynamo_config.patch(cache_size_limit=16)
 def run_tests(
     monkeypatch: pytest.MonkeyPatch,