add e2e test for mtp async_scheduling (#4826)
### What this PR does / why we need it?
add e2e test for mtp async scheduling
### Does this PR introduce _any_ user-facing change?
no
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: Ronald1995 <ronaldautomobile@163.com>
This commit is contained in:
@@ -13,6 +13,7 @@ from tests.e2e.conftest import VllmRunner
|
|||||||
from tests.e2e.model_utils import check_outputs_equal
|
from tests.e2e.model_utils import check_outputs_equal
|
||||||
|
|
||||||
MODEL = "Qwen/Qwen3-0.6B"
|
MODEL = "Qwen/Qwen3-0.6B"
|
||||||
|
MTP_MODEL = "wemaster/deepseek_mtp_main_random_bf16"
|
||||||
|
|
||||||
first_prompt = ("The following numbers of the sequence " +
|
first_prompt = ("The following numbers of the sequence " +
|
||||||
", ".join(str(i) for i in range(10)) + " are:")
|
", ".join(str(i) for i in range(10)) + " are:")
|
||||||
@@ -44,6 +45,27 @@ def test_without_spec_decoding(monkeypatch: pytest.MonkeyPatch, ):
|
|||||||
run_tests(monkeypatch, MODEL, test_configs, test_sampling_params)
|
run_tests(monkeypatch, MODEL, test_configs, test_sampling_params)
|
||||||
|
|
||||||
|
|
||||||
|
def test_with_spec_decoding(monkeypatch: pytest.MonkeyPatch):
|
||||||
|
"""Test consistency and acceptance rates with some different combos of
|
||||||
|
preemption, executor, async scheduling, prefill chunking,
|
||||||
|
spec decoding model length.
|
||||||
|
"""
|
||||||
|
|
||||||
|
spec_config = {
|
||||||
|
"method": "mtp",
|
||||||
|
"num_speculative_tokens": 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
# test_preemption, executor, async_scheduling,
|
||||||
|
# spec_config, test_prefill_chunking
|
||||||
|
test_configs = [
|
||||||
|
(False, "mp", True, spec_config, False),
|
||||||
|
(False, "mp", False, spec_config, False),
|
||||||
|
]
|
||||||
|
|
||||||
|
run_tests(monkeypatch, MTP_MODEL, test_configs, [{}])
|
||||||
|
|
||||||
|
|
||||||
@dynamo_config.patch(cache_size_limit=16)
|
@dynamo_config.patch(cache_size_limit=16)
|
||||||
def run_tests(
|
def run_tests(
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
|||||||
Reference in New Issue
Block a user