From 6cb76ecd0280cb363548058eca6044606be35d37 Mon Sep 17 00:00:00 2001
From: ZT-AIA <63220130+ZT-AIA@users.noreply.github.com>
Date: Thu, 18 Dec 2025 22:25:45 +0800
Subject: [PATCH] [Nightly]  Avoid max_model_len being smaller than the decoder
 prompt to prevent single-node-accuray-tests from failing (#5174)

### What this PR does / why we need it?
[Nightly] Avoid max_model_len being smaller than the decoder prompt to
prevent single-node-accuray-tests from failing
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?

- vLLM version: v0.12.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9

---------

Signed-off-by: ZT-AIA <1028681969@qq.com>
Signed-off-by: ZT-AIA <63220130+ZT-AIA@users.noreply.github.com>
---
 tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml | 1 +
 tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml b/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml
index 9c98249c..5b5dc050 100644
--- a/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml
+++ b/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml
@@ -6,6 +6,7 @@ tasks:
   metrics:
   - name: "acc,none"
     value: 0.58
+max_model_len: 8192
 tensor_parallel_size: 2
 gpu_memory_utilization: 0.7
 enable_expert_parallel: True
diff --git a/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml b/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml
index 96581e54..8803a120 100644
--- a/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml
+++ b/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml
@@ -6,5 +6,6 @@ tasks:
   metrics:
   - name: "acc,none"
     value: 0.55
+max_model_len: 8192
 batch_size: 32
 gpu_memory_utilization: 0.7