From ee2e55e602fb244d4c490f3b0f6ad56b2fbe9d77 Mon Sep 17 00:00:00 2001 From: lilinsiman Date: Fri, 31 Oct 2025 11:23:55 +0800 Subject: [PATCH] [v0.11.0][Test] Add new test model for aclgraph single_request v0.11.0 (#3889) ### What this PR does / why we need it? add new test model for aclgraph single_request v0.11.0 ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? ut Signed-off-by: lilinsiman --- .../multicard/test_single_request_aclgraph.py | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tests/e2e/multicard/test_single_request_aclgraph.py b/tests/e2e/multicard/test_single_request_aclgraph.py index 1a0e6f9..8af62f9 100644 --- a/tests/e2e/multicard/test_single_request_aclgraph.py +++ b/tests/e2e/multicard/test_single_request_aclgraph.py @@ -23,9 +23,7 @@ from vllm.utils import get_open_port from tests.e2e.conftest import RemoteOpenAIServer -MODELS = [ - "Qwen/Qwen3-30B-A3B", -] +MODELS = ["Qwen/Qwen3-30B-A3B", "vllm-ascend/DeepSeek-V2-Lite-W8A8"] DATA_PARALLELS = [2] @@ -47,12 +45,21 @@ async def test_single_request_aclgraph(model: str, dp_size: int) -> None: "TASK_QUEUE_ENABLE": "1", "HCCL_OP_EXPANSION_MODE": "AIV", } - server_args = [ - "--no-enable-prefix-caching", "--tensor-parallel-size", "1", - "--data-parallel-size", - str(dp_size), "--port", - str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9" - ] + if model == "vllm-ascend/DeepSeek-V2-Lite-W8A8": + server_args = [ + "--no-enable-prefix-caching", "--tensor-parallel-size", "1", + "--data-parallel-size", + str(dp_size), "--quantization", "ascend", "--max-model-len", + "1024", "--port", + str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9" + ] + else: + server_args = [ + "--no-enable-prefix-caching", "--tensor-parallel-size", "1", + "--data-parallel-size", + str(dp_size), "--port", + str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9" + ] request_keyword_args: dict[str, Any] = { **api_keyword_args, }