From 1f486b2dd12cac1b357f3c82211293eb6910914e Mon Sep 17 00:00:00 2001
From: lilinsiman <lilinsiman@gmail.com>
Date: Fri, 31 Oct 2025 11:23:13 +0800
Subject: [PATCH] [Test] Add new test model for aclgraph single_request (#3888)

### What this PR does / why we need it?
add new test model for aclgraph single_request

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
ut

- vLLM version: v0.11.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/83f478bb19489b41e9d208b47b4bb5a95ac171ac

Signed-off-by: lilinsiman <lilinsiman@gmail.com>
---
 .../multicard/test_single_request_aclgraph.py | 25 ++++++++++++-------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/tests/e2e/multicard/test_single_request_aclgraph.py b/tests/e2e/multicard/test_single_request_aclgraph.py
index f7ef5d3e..5172f72a 100644
--- a/tests/e2e/multicard/test_single_request_aclgraph.py
+++ b/tests/e2e/multicard/test_single_request_aclgraph.py
@@ -28,9 +28,7 @@ if vllm_version_is("0.11.0"):
 else:
     from vllm.utils.network_utils import get_open_port
 
-MODELS = [
-    "Qwen/Qwen3-30B-A3B",
-]
+MODELS = ["Qwen/Qwen3-30B-A3B", "vllm-ascend/DeepSeek-V2-Lite-W8A8"]
 
 DATA_PARALLELS = [2]
 
@@ -52,12 +50,21 @@ async def test_single_request_aclgraph(model: str, dp_size: int) -> None:
         "TASK_QUEUE_ENABLE": "1",
         "HCCL_OP_EXPANSION_MODE": "AIV",
     }
-    server_args = [
-        "--no-enable-prefix-caching", "--tensor-parallel-size", "1",
-        "--data-parallel-size",
-        str(dp_size), "--port",
-        str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9"
-    ]
+    if model == "vllm-ascend/DeepSeek-V2-Lite-W8A8":
+        server_args = [
+            "--no-enable-prefix-caching", "--tensor-parallel-size", "1",
+            "--data-parallel-size",
+            str(dp_size), "--quantization", "ascend", "--max-model-len",
+            "1024", "--port",
+            str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9"
+        ]
+    else:
+        server_args = [
+            "--no-enable-prefix-caching", "--tensor-parallel-size", "1",
+            "--data-parallel-size",
+            str(dp_size), "--port",
+            str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9"
+        ]
     request_keyword_args: dict[str, Any] = {
         **api_keyword_args,
     }