From 34b278a33951a2253740c2248cf2a30bb59146e0 Mon Sep 17 00:00:00 2001 From: jiangyunfan1 Date: Thu, 6 Nov 2025 16:58:38 +0800 Subject: [PATCH] [TEST]Update nightly acc test standard (#4032) ### What this PR does / why we need it? This PR updates the acc test standard for some cases, we need it to better maintain acc ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? by running the test - vLLM version: v0.11.0 - vLLM main: https://github.com/vllm-project/vllm/commit/83f478bb19489b41e9d208b47b4bb5a95ac171ac Signed-off-by: jiangyunfan1 --- tests/e2e/nightly/models/test_qwen2_5_vl_32b.py | 2 +- tests/e2e/nightly/models/test_qwen2_5_vl_7b.py | 2 +- tests/e2e/nightly/models/test_qwen3_32b_int8.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py b/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py index 760f8dee..fe6bbedf 100644 --- a/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py +++ b/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py @@ -45,7 +45,7 @@ aisbench_cases = [{ "dataset_conf": "textvqa/textvqa_gen_base64", "max_out_len": 2048, "batch_size": 128, - "baseline": 76, + "baseline": 76.22, "temperature": 0, "top_k": -1, "top_p": 1, diff --git a/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py b/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py index bc35ff88..d3a726bf 100644 --- a/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py +++ b/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py @@ -45,7 +45,7 @@ aisbench_cases = [{ "dataset_conf": "textvqa/textvqa_gen_base64", "max_out_len": 2048, "batch_size": 128, - "baseline": 81, + "baseline": 82.05, "threshold": 5 }, { "case_type": "performance", diff --git a/tests/e2e/nightly/models/test_qwen3_32b_int8.py b/tests/e2e/nightly/models/test_qwen3_32b_int8.py index e245f3d7..bbaf863a 100644 --- a/tests/e2e/nightly/models/test_qwen3_32b_int8.py +++ b/tests/e2e/nightly/models/test_qwen3_32b_int8.py @@ -58,7 +58,7 @@ aisbench_cases = [{ "max_out_len": 32768, "batch_size": 32, "baseline": 83.33, - "threshold": 17 + "threshold": 7 }, { "case_type": "performance", "dataset_path": "vllm-ascend/GSM8K-in3500-bs400",