add e2e test for mtp async_scheduling (#4826)
### What this PR does / why we need it?
add e2e test for mtp async scheduling
### Does this PR introduce _any_ user-facing change?
no
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: Ronald1995 <ronaldautomobile@163.com>
This commit is contained in:
@@ -13,6 +13,7 @@ from tests.e2e.conftest import VllmRunner
|
||||
from tests.e2e.model_utils import check_outputs_equal
|
||||
|
||||
MODEL = "Qwen/Qwen3-0.6B"
|
||||
MTP_MODEL = "wemaster/deepseek_mtp_main_random_bf16"
|
||||
|
||||
first_prompt = ("The following numbers of the sequence " +
|
||||
", ".join(str(i) for i in range(10)) + " are:")
|
||||
@@ -44,6 +45,27 @@ def test_without_spec_decoding(monkeypatch: pytest.MonkeyPatch, ):
|
||||
run_tests(monkeypatch, MODEL, test_configs, test_sampling_params)
|
||||
|
||||
|
||||
def test_with_spec_decoding(monkeypatch: pytest.MonkeyPatch):
|
||||
"""Test consistency and acceptance rates with some different combos of
|
||||
preemption, executor, async scheduling, prefill chunking,
|
||||
spec decoding model length.
|
||||
"""
|
||||
|
||||
spec_config = {
|
||||
"method": "mtp",
|
||||
"num_speculative_tokens": 2,
|
||||
}
|
||||
|
||||
# test_preemption, executor, async_scheduling,
|
||||
# spec_config, test_prefill_chunking
|
||||
test_configs = [
|
||||
(False, "mp", True, spec_config, False),
|
||||
(False, "mp", False, spec_config, False),
|
||||
]
|
||||
|
||||
run_tests(monkeypatch, MTP_MODEL, test_configs, [{}])
|
||||
|
||||
|
||||
@dynamo_config.patch(cache_size_limit=16)
|
||||
def run_tests(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
|
||||
Reference in New Issue
Block a user