add e2e test for mtp async_scheduling (#4826)

### What this PR does / why we need it? add e2e test for mtp async scheduling ### Does this PR introduce _any_ user-facing change? no - vLLM version: v0.12.0 - vLLM main: ad32e3e19c --------- Signed-off-by: Ronald1995 <ronaldautomobile@163.com>
2025-12-10 11:30:22 +08:00
parent 134e011896
commit 1a7a34c5ec
1 changed files with 22 additions and 0 deletions
--- a/tests/e2e/singlecard/test_async_scheduling.py
+++ b/tests/e2e/singlecard/test_async_scheduling.py
@@ -13,6 +13,7 @@ from tests.e2e.conftest import VllmRunner
 from tests.e2e.model_utils import check_outputs_equal

 MODEL = "Qwen/Qwen3-0.6B"
+MTP_MODEL = "wemaster/deepseek_mtp_main_random_bf16"

 first_prompt = ("The following numbers of the sequence " +
                ", ".join(str(i) for i in range(10)) + " are:")
@@ -44,6 +45,27 @@ def test_without_spec_decoding(monkeypatch: pytest.MonkeyPatch, ):
    run_tests(monkeypatch, MODEL, test_configs, test_sampling_params)


+def test_with_spec_decoding(monkeypatch: pytest.MonkeyPatch):
+    """Test consistency and acceptance rates with some different combos of
+    preemption, executor, async scheduling, prefill chunking,
+    spec decoding model length.
+    """
+
+    spec_config = {
+        "method": "mtp",
+        "num_speculative_tokens": 2,
+    }
+
+    # test_preemption, executor, async_scheduling,
+    # spec_config, test_prefill_chunking
+    test_configs = [
+        (False, "mp", True, spec_config, False),
+        (False, "mp", False, spec_config, False),
+    ]
+
+    run_tests(monkeypatch, MTP_MODEL, test_configs, [{}])
+
+
@dynamo_config.patch(cache_size_limit=16)
 def run_tests(
    monkeypatch: pytest.MonkeyPatch,