From 6cb76ecd0280cb363548058eca6044606be35d37 Mon Sep 17 00:00:00 2001 From: ZT-AIA <63220130+ZT-AIA@users.noreply.github.com> Date: Thu, 18 Dec 2025 22:25:45 +0800 Subject: [PATCH] [Nightly] Avoid max_model_len being smaller than the decoder prompt to prevent single-node-accuray-tests from failing (#5174) ### What this PR does / why we need it? [Nightly] Avoid max_model_len being smaller than the decoder prompt to prevent single-node-accuray-tests from failing ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 --------- Signed-off-by: ZT-AIA <1028681969@qq.com> Signed-off-by: ZT-AIA <63220130+ZT-AIA@users.noreply.github.com> --- tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml | 1 + tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml b/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml index 9c98249c..5b5dc050 100644 --- a/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml +++ b/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml @@ -6,6 +6,7 @@ tasks: metrics: - name: "acc,none" value: 0.58 +max_model_len: 8192 tensor_parallel_size: 2 gpu_memory_utilization: 0.7 enable_expert_parallel: True diff --git a/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml b/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml index 96581e54..8803a120 100644 --- a/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml +++ b/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml @@ -6,5 +6,6 @@ tasks: metrics: - name: "acc,none" value: 0.55 +max_model_len: 8192 batch_size: 32 gpu_memory_utilization: 0.7