[Test] Add new test model for aclgraph single_request (#3888)
### What this PR does / why we need it?
add new test model for aclgraph single_request
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
ut
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
Signed-off-by: lilinsiman <lilinsiman@gmail.com>
This commit is contained in:
@@ -28,9 +28,7 @@ if vllm_version_is("0.11.0"):
|
|||||||
else:
|
else:
|
||||||
from vllm.utils.network_utils import get_open_port
|
from vllm.utils.network_utils import get_open_port
|
||||||
|
|
||||||
MODELS = [
|
MODELS = ["Qwen/Qwen3-30B-A3B", "vllm-ascend/DeepSeek-V2-Lite-W8A8"]
|
||||||
"Qwen/Qwen3-30B-A3B",
|
|
||||||
]
|
|
||||||
|
|
||||||
DATA_PARALLELS = [2]
|
DATA_PARALLELS = [2]
|
||||||
|
|
||||||
@@ -52,12 +50,21 @@ async def test_single_request_aclgraph(model: str, dp_size: int) -> None:
|
|||||||
"TASK_QUEUE_ENABLE": "1",
|
"TASK_QUEUE_ENABLE": "1",
|
||||||
"HCCL_OP_EXPANSION_MODE": "AIV",
|
"HCCL_OP_EXPANSION_MODE": "AIV",
|
||||||
}
|
}
|
||||||
server_args = [
|
if model == "vllm-ascend/DeepSeek-V2-Lite-W8A8":
|
||||||
"--no-enable-prefix-caching", "--tensor-parallel-size", "1",
|
server_args = [
|
||||||
"--data-parallel-size",
|
"--no-enable-prefix-caching", "--tensor-parallel-size", "1",
|
||||||
str(dp_size), "--port",
|
"--data-parallel-size",
|
||||||
str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9"
|
str(dp_size), "--quantization", "ascend", "--max-model-len",
|
||||||
]
|
"1024", "--port",
|
||||||
|
str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9"
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
server_args = [
|
||||||
|
"--no-enable-prefix-caching", "--tensor-parallel-size", "1",
|
||||||
|
"--data-parallel-size",
|
||||||
|
str(dp_size), "--port",
|
||||||
|
str(port), "--trust-remote-code", "--gpu-memory-utilization", "0.9"
|
||||||
|
]
|
||||||
request_keyword_args: dict[str, Any] = {
|
request_keyword_args: dict[str, Any] = {
|
||||||
**api_keyword_args,
|
**api_keyword_args,
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user