[TEST]Add full graph for multimodal nightly tests (#3968)
### What this PR does / why we need it?
This PR adds full graph for multimodal nightly test, we need to maintain
this senario
### How was this patch tested?
by running the test
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
Signed-off-by: jiangyunfan1 <jiangyunfan1@h-partners.com>
This commit is contained in:
@@ -86,7 +86,8 @@ async def test_models(model: str, tp_size: int) -> None:
|
|||||||
str(port), "--max-model-len", "30000", "--max-num-batched-tokens",
|
str(port), "--max-model-len", "30000", "--max-num-batched-tokens",
|
||||||
"40000", "--max-num-seqs", "400", "--trust-remote-code",
|
"40000", "--max-num-seqs", "400", "--trust-remote-code",
|
||||||
"--gpu-memory-utilization", "0.8", "--additional-config",
|
"--gpu-memory-utilization", "0.8", "--additional-config",
|
||||||
'{"ascend_scheduler_config":{"enabled":false}}'
|
'{"ascend_scheduler_config":{"enabled":false}}',
|
||||||
|
"--compilation_config", '{"cudagraph_mode": "FULL_DECODE_ONLY"}'
|
||||||
]
|
]
|
||||||
request_keyword_args: dict[str, Any] = {
|
request_keyword_args: dict[str, Any] = {
|
||||||
**api_keyword_args,
|
**api_keyword_args,
|
||||||
|
|||||||
@@ -72,21 +72,13 @@ async def test_models(model: str, tp_size: int) -> None:
|
|||||||
"HCCL_OP_EXPANSION_MODE": "AIV"
|
"HCCL_OP_EXPANSION_MODE": "AIV"
|
||||||
}
|
}
|
||||||
server_args = [
|
server_args = [
|
||||||
"--no-enable-prefix-caching",
|
"--no-enable-prefix-caching", "--disable-mm-preprocessor-cache",
|
||||||
"--disable-mm-preprocessor-cache",
|
|
||||||
"--tensor-parallel-size",
|
"--tensor-parallel-size",
|
||||||
str(tp_size),
|
str(tp_size), "--port",
|
||||||
"--port",
|
str(port), "--max-model-len", "30000", "--max-num-batched-tokens",
|
||||||
str(port),
|
"40000", "--max-num-seqs", "400", "--trust-remote-code",
|
||||||
"--max-model-len",
|
"--gpu-memory-utilization", "0.8", "--compilation_config",
|
||||||
"30000",
|
'{"cudagraph_mode": "FULL_DECODE_ONLY"}'
|
||||||
"--max-num-batched-tokens",
|
|
||||||
"40000",
|
|
||||||
"--max-num-seqs",
|
|
||||||
"400",
|
|
||||||
"--trust-remote-code",
|
|
||||||
"--gpu-memory-utilization",
|
|
||||||
"0.8",
|
|
||||||
]
|
]
|
||||||
request_keyword_args: dict[str, Any] = {
|
request_keyword_args: dict[str, Any] = {
|
||||||
**api_keyword_args,
|
**api_keyword_args,
|
||||||
|
|||||||
Reference in New Issue
Block a user