From 1f486b2dd12cac1b357f3c82211293eb6910914e Mon Sep 17 00:00:00 2001 From: lilinsiman Date: Fri, 31 Oct 2025 11:23:13 +0800 Subject: [PATCH] [Test] Add new test model for aclgraph single_request (#3888) ### What this PR does / why we need it? add new test model for aclgraph single_request ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? ut - vLLM version: v0.11.0 - vLLM main: https://github.com/vllm-project/vllm/commit/83f478bb19489b41e9d208b47b4bb5a95ac171ac Signed-off-by: lilinsiman --- .../multicard/test_single_request_aclgraph.py | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tests/e2e/multicard/test_single_request_aclgraph.py b/tests/e2e/multicard/test_single_request_aclgraph.py index f7ef5d3e..5172f72a 100644 --- a/tests/e2e/multicard/test_single_request_aclgraph.py +++ b/tests/e2e/multicard/test_single_request_aclgraph.py @@ -28,9 +28,7 @@ if vllm_version_is("0.11.0"): else: from vllm.utils.network_utils import get_open_port -MODELS = [ - "Qwen/Qwen3-30B-A3B", -] +MODELS = ["Qwen/Qwen3-30B-A3B", "vllm-ascend/DeepSeek-V2-Lite-W8A8"] DATA_PARALLELS = [2] @@ -52,12 +50,21 @@ async def test_single_request_aclgraph(model: str, dp_size: int) -> None: "TASK_QUEUE_ENABLE": "1", "HCCL_OP_EXPANSION_MODE": "AIV", } - server_args = [ - "--no-enable-prefix-caching", "--tensor-parallel-size", "1", - "--data-parallel-size", - str(dp_size), "--port", - str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9" - ] + if model == "vllm-ascend/DeepSeek-V2-Lite-W8A8": + server_args = [ + "--no-enable-prefix-caching", "--tensor-parallel-size", "1", + "--data-parallel-size", + str(dp_size), "--quantization", "ascend", "--max-model-len", + "1024", "--port", + str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9" + ] + else: + server_args = [ + "--no-enable-prefix-caching", "--tensor-parallel-size", "1", + "--data-parallel-size", + str(dp_size), "--port", + str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9" + ] request_keyword_args: dict[str, Any] = { **api_keyword_args, }