[TEST]Add full graph for multimodal nightly tests (#3968)

### What this PR does / why we need it? This PR adds full graph for multimodal nightly test, we need to maintain this senario ### How was this patch tested? by running the test - vLLM version: v0.11.0 - vLLM main: 83f478bb19 Signed-off-by: jiangyunfan1 <jiangyunfan1@h-partners.com>
2025-11-04 16:47:48 +08:00
parent 15bb5098ad
commit 44b58b8665
2 changed files with 8 additions and 15 deletions
--- a/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py
+++ b/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py
@@ -86,7 +86,8 @@ async def test_models(model: str, tp_size: int) -> None:
        str(port), "--max-model-len", "30000", "--max-num-batched-tokens",
        "40000", "--max-num-seqs", "400", "--trust-remote-code",
        "--gpu-memory-utilization", "0.8", "--additional-config",
-        '{"ascend_scheduler_config":{"enabled":false}}'
+        '{"ascend_scheduler_config":{"enabled":false}}',
        "--compilation_config", '{"cudagraph_mode": "FULL_DECODE_ONLY"}'
    ]
    request_keyword_args: dict[str, Any] = {
        **api_keyword_args,
--- a/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py
+++ b/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py
@@ -72,21 +72,13 @@ async def test_models(model: str, tp_size: int) -> None:
        "HCCL_OP_EXPANSION_MODE": "AIV"
    }
    server_args = [
-        "--no-enable-prefix-caching",
+        "--no-enable-prefix-caching", "--disable-mm-preprocessor-cache",
        "--disable-mm-preprocessor-cache",
        "--tensor-parallel-size",
-        str(tp_size),
+        str(tp_size), "--port",
-        "--port",
+        str(port), "--max-model-len", "30000", "--max-num-batched-tokens",
-        str(port),
+        "40000", "--max-num-seqs", "400", "--trust-remote-code",
-        "--max-model-len",
+        "--gpu-memory-utilization", "0.8", "--compilation_config",
-        "30000",
+        '{"cudagraph_mode": "FULL_DECODE_ONLY"}'
        "--max-num-batched-tokens",
        "40000",
        "--max-num-seqs",
        "400",
        "--trust-remote-code",
        "--gpu-memory-utilization",
        "0.8",
    ]
    request_keyword_args: dict[str, Any] = {
        **api_keyword_args,