[TEST]Add full graph for multimodal nightly tests (#3968)
### What this PR does / why we need it?
This PR adds full graph for multimodal nightly test, we need to maintain
this senario
### How was this patch tested?
by running the test
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
Signed-off-by: jiangyunfan1 <jiangyunfan1@h-partners.com>
This commit is contained in:
@@ -86,7 +86,8 @@ async def test_models(model: str, tp_size: int) -> None:
|
||||
str(port), "--max-model-len", "30000", "--max-num-batched-tokens",
|
||||
"40000", "--max-num-seqs", "400", "--trust-remote-code",
|
||||
"--gpu-memory-utilization", "0.8", "--additional-config",
|
||||
'{"ascend_scheduler_config":{"enabled":false}}'
|
||||
'{"ascend_scheduler_config":{"enabled":false}}',
|
||||
"--compilation_config", '{"cudagraph_mode": "FULL_DECODE_ONLY"}'
|
||||
]
|
||||
request_keyword_args: dict[str, Any] = {
|
||||
**api_keyword_args,
|
||||
|
||||
@@ -72,21 +72,13 @@ async def test_models(model: str, tp_size: int) -> None:
|
||||
"HCCL_OP_EXPANSION_MODE": "AIV"
|
||||
}
|
||||
server_args = [
|
||||
"--no-enable-prefix-caching",
|
||||
"--disable-mm-preprocessor-cache",
|
||||
"--no-enable-prefix-caching", "--disable-mm-preprocessor-cache",
|
||||
"--tensor-parallel-size",
|
||||
str(tp_size),
|
||||
"--port",
|
||||
str(port),
|
||||
"--max-model-len",
|
||||
"30000",
|
||||
"--max-num-batched-tokens",
|
||||
"40000",
|
||||
"--max-num-seqs",
|
||||
"400",
|
||||
"--trust-remote-code",
|
||||
"--gpu-memory-utilization",
|
||||
"0.8",
|
||||
str(tp_size), "--port",
|
||||
str(port), "--max-model-len", "30000", "--max-num-batched-tokens",
|
||||
"40000", "--max-num-seqs", "400", "--trust-remote-code",
|
||||
"--gpu-memory-utilization", "0.8", "--compilation_config",
|
||||
'{"cudagraph_mode": "FULL_DECODE_ONLY"}'
|
||||
]
|
||||
request_keyword_args: dict[str, Any] = {
|
||||
**api_keyword_args,
|
||||
|
||||
Reference in New Issue
Block a user