From 9d84172359d6f1a23cc6be7c0cc619f943529f07 Mon Sep 17 00:00:00 2001 From: drslark <96540755+drslark@users.noreply.github.com> Date: Thu, 13 Nov 2025 11:08:35 +0800 Subject: [PATCH] [BugFix] adapted e2e tests for Qwen3-next-mtp (#4160) ### What this PR does / why we need it? Now, from https://github.com/vllm-project/vllm-ascend/pull/3967, chunked prefill and spiltfuse are defaultly enabled. The e2e test for mtp breaks now. After locating the bug, we found that a triton operator does not support chunked prefill. But if let e2e test be skipped is bad. So, we changed the e2e test to only test the case in which chunked prefill is off. ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? Because we only modified `test_models_distributed_Qwen3_NEXT_MTP_TP4_SIMILARITY`. So, we only run `pytest -s tests/e2e/multicard/test_qwen3_next.py::test_models_distributed_Qwen3_NEXT_MTP_TP4_SIMILARITY` locally to test it. Below is the result: ```text ==================================================================================================================== warnings summary ==================================================================================================================== usr/local/python3.11.10/lib/python3.11/site-packages/torch_npu/dynamo/torchair/__init__.py:8 /usr/local/python3.11.10/lib/python3.11/site-packages/torch_npu/dynamo/torchair/__init__.py:8: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html import pkg_resources :241 :241: DeprecationWarning: builtin type SwigPyPacked has no __module__ attribute :241 :241: DeprecationWarning: builtin type SwigPyObject has no __module__ attribute tests/e2e/multicard/test_qwen3_next.py::test_models_distributed_Qwen3_NEXT_MTP_TP4_SIMILARITY tests/e2e/multicard/test_qwen3_next.py::test_models_distributed_Qwen3_NEXT_MTP_TP4_SIMILARITY /usr/local/python3.11.10/lib/python3.11/site-packages/pydantic/_internal/_dataclasses.py:121: DeprecationWarning: The 'task' option has been deprecated and will be removed in v0.13.0 or v1.0, whichever comes first. Please remove this option. s.__pydantic_validator__.validate_python(ArgsKwargs(args, kwargs), self_instance=s) -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html ======================================================================================================= 1 passed, 5 warnings in 314.52s (0:05:14) ======================================================================================================== sys:1: DeprecationWarning: builtin type swigvarlink has no __module__ attribute ``` - vLLM version: v0.11.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379 Signed-off-by: drslark --- tests/e2e/multicard/test_qwen3_next.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/e2e/multicard/test_qwen3_next.py b/tests/e2e/multicard/test_qwen3_next.py index ee213a8f..c17eed95 100644 --- a/tests/e2e/multicard/test_qwen3_next.py +++ b/tests/e2e/multicard/test_qwen3_next.py @@ -20,8 +20,6 @@ Run `pytest tests/e2e/multicard/test_qwen3_next.py`. """ -import pytest - from tests.e2e.conftest import VllmRunner @@ -59,7 +57,6 @@ def test_models_distributed_Qwen3_NEXT_TP4_FULL_DECODE_ONLY(): del vllm_model -@pytest.mark.skip(reason="TODO: fix the test case later.") def test_models_distributed_Qwen3_NEXT_MTP_TP4_SIMILARITY(): example_prompts = [ "Hello, my name is", @@ -82,6 +79,12 @@ def test_models_distributed_Qwen3_NEXT_MTP_TP4_SIMILARITY(): max_model_len=4096, gpu_memory_utilization=0.8, distributed_executor_backend="mp", + additional_config={ + "ascend_scheduler_config": { + "enabled": True, + "enable_chunked_prefill": False + } + }, speculative_config={ "method": "qwen3_next_mtp", "num_speculative_tokens": 1