[TEST]Update mm param --mm-processor-cache-gb (#5242)
### What this PR does / why we need it?
This PR updates the mm param --mm-processor-cache-gb, we need it to run
the case
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
by running the test
- vLLM version: release/v0.13.0
- vLLM main:
ad32e3e19c
Signed-off-by: jiangyunfan1 <jiangyunfan1@h-partners.com>
This commit is contained in:
@@ -80,7 +80,7 @@ async def test_models(model: str, tp_size: int) -> None:
|
|||||||
"HCCL_OP_EXPANSION_MODE": "AIV"
|
"HCCL_OP_EXPANSION_MODE": "AIV"
|
||||||
}
|
}
|
||||||
server_args = [
|
server_args = [
|
||||||
"--no-enable-prefix-caching", "--disable-mm-preprocessor-cache",
|
"--no-enable-prefix-caching", "--mm-processor-cache-gb", "0",
|
||||||
"--tensor-parallel-size",
|
"--tensor-parallel-size",
|
||||||
str(tp_size), "--port",
|
str(tp_size), "--port",
|
||||||
str(port), "--max-model-len", "30000", "--max-num-batched-tokens",
|
str(port), "--max-model-len", "30000", "--max-num-batched-tokens",
|
||||||
|
|||||||
@@ -72,7 +72,7 @@ async def test_models(model: str, tp_size: int) -> None:
|
|||||||
"HCCL_OP_EXPANSION_MODE": "AIV"
|
"HCCL_OP_EXPANSION_MODE": "AIV"
|
||||||
}
|
}
|
||||||
server_args = [
|
server_args = [
|
||||||
"--no-enable-prefix-caching", "--disable-mm-preprocessor-cache",
|
"--no-enable-prefix-caching", "--mm-processor-cache-gb", "0",
|
||||||
"--tensor-parallel-size",
|
"--tensor-parallel-size",
|
||||||
str(tp_size), "--port",
|
str(tp_size), "--port",
|
||||||
str(port), "--max-model-len", "30000", "--max-num-batched-tokens",
|
str(port), "--max-model-len", "30000", "--max-num-batched-tokens",
|
||||||
|
|||||||
Reference in New Issue
Block a user