add e2e test for mtp async_scheduling (#4826)

### What this PR does / why we need it?
add e2e test for mtp async scheduling
### Does this PR introduce _any_ user-facing change?
no

- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c

---------

Signed-off-by: Ronald1995 <ronaldautomobile@163.com>
This commit is contained in:
Ronald
2025-12-10 11:30:22 +08:00
committed by GitHub
parent 134e011896
commit 1a7a34c5ec

View File

@@ -13,6 +13,7 @@ from tests.e2e.conftest import VllmRunner
from tests.e2e.model_utils import check_outputs_equal
MODEL = "Qwen/Qwen3-0.6B"
MTP_MODEL = "wemaster/deepseek_mtp_main_random_bf16"
first_prompt = ("The following numbers of the sequence " +
", ".join(str(i) for i in range(10)) + " are:")
@@ -44,6 +45,27 @@ def test_without_spec_decoding(monkeypatch: pytest.MonkeyPatch, ):
run_tests(monkeypatch, MODEL, test_configs, test_sampling_params)
def test_with_spec_decoding(monkeypatch: pytest.MonkeyPatch):
"""Test consistency and acceptance rates with some different combos of
preemption, executor, async scheduling, prefill chunking,
spec decoding model length.
"""
spec_config = {
"method": "mtp",
"num_speculative_tokens": 2,
}
# test_preemption, executor, async_scheduling,
# spec_config, test_prefill_chunking
test_configs = [
(False, "mp", True, spec_config, False),
(False, "mp", False, spec_config, False),
]
run_tests(monkeypatch, MTP_MODEL, test_configs, [{}])
@dynamo_config.patch(cache_size_limit=16)
def run_tests(
monkeypatch: pytest.MonkeyPatch,