From 44b58b86659a55a690ffa9c0362f02ba3645d3da Mon Sep 17 00:00:00 2001
From: jiangyunfan1 <jiangyunfan1@h-partners.com>
Date: Tue, 4 Nov 2025 16:47:48 +0800
Subject: [PATCH] [TEST]Add full graph for multimodal nightly tests (#3968)

### What this PR does / why we need it?
This PR adds full graph for multimodal nightly test, we need to maintain
this senario

### How was this patch tested?
by running the test
- vLLM version: v0.11.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/83f478bb19489b41e9d208b47b4bb5a95ac171ac

Signed-off-by: jiangyunfan1 <jiangyunfan1@h-partners.com>
---
 .../e2e/nightly/models/test_qwen2_5_vl_32b.py |  3 ++-
 .../e2e/nightly/models/test_qwen2_5_vl_7b.py  | 20 ++++++-------------
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py b/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py
index 09c0d4ee..760f8dee 100644
--- a/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py
+++ b/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py
@@ -86,7 +86,8 @@ async def test_models(model: str, tp_size: int) -> None:
         str(port), "--max-model-len", "30000", "--max-num-batched-tokens",
         "40000", "--max-num-seqs", "400", "--trust-remote-code",
         "--gpu-memory-utilization", "0.8", "--additional-config",
-        '{"ascend_scheduler_config":{"enabled":false}}'
+        '{"ascend_scheduler_config":{"enabled":false}}',
+        "--compilation_config", '{"cudagraph_mode": "FULL_DECODE_ONLY"}'
     ]
     request_keyword_args: dict[str, Any] = {
         **api_keyword_args,
diff --git a/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py b/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py
index 578ae1db..bc35ff88 100644
--- a/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py
+++ b/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py
@@ -72,21 +72,13 @@ async def test_models(model: str, tp_size: int) -> None:
         "HCCL_OP_EXPANSION_MODE": "AIV"
     }
     server_args = [
-        "--no-enable-prefix-caching",
-        "--disable-mm-preprocessor-cache",
+        "--no-enable-prefix-caching", "--disable-mm-preprocessor-cache",
         "--tensor-parallel-size",
-        str(tp_size),
-        "--port",
-        str(port),
-        "--max-model-len",
-        "30000",
-        "--max-num-batched-tokens",
-        "40000",
-        "--max-num-seqs",
-        "400",
-        "--trust-remote-code",
-        "--gpu-memory-utilization",
-        "0.8",
+        str(tp_size), "--port",
+        str(port), "--max-model-len", "30000", "--max-num-batched-tokens",
+        "40000", "--max-num-seqs", "400", "--trust-remote-code",
+        "--gpu-memory-utilization", "0.8", "--compilation_config",
+        '{"cudagraph_mode": "FULL_DECODE_ONLY"}'
     ]
     request_keyword_args: dict[str, Any] = {
         **api_keyword_args,