diff --git a/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py b/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py index 77c1a7e1..4ecd403f 100644 --- a/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py +++ b/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py @@ -80,7 +80,7 @@ async def test_models(model: str, tp_size: int) -> None: "HCCL_OP_EXPANSION_MODE": "AIV" } server_args = [ - "--no-enable-prefix-caching", "--disable-mm-preprocessor-cache", + "--no-enable-prefix-caching", "--mm-processor-cache-gb", "0", "--tensor-parallel-size", str(tp_size), "--port", str(port), "--max-model-len", "30000", "--max-num-batched-tokens", diff --git a/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py b/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py index d3a726bf..33cab210 100644 --- a/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py +++ b/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py @@ -72,7 +72,7 @@ async def test_models(model: str, tp_size: int) -> None: "HCCL_OP_EXPANSION_MODE": "AIV" } server_args = [ - "--no-enable-prefix-caching", "--disable-mm-preprocessor-cache", + "--no-enable-prefix-caching", "--mm-processor-cache-gb", "0", "--tensor-parallel-size", str(tp_size), "--port", str(port), "--max-model-len", "30000", "--max-num-batched-tokens",