From 9a1cfb48d4d7d2a57f6f71e6f7c0df5a79c22952 Mon Sep 17 00:00:00 2001 From: jiangyunfan1 Date: Mon, 17 Nov 2025 19:06:54 +0800 Subject: [PATCH] [TEST]Update prefixcache perf threshold for qwen3-32b-int8 (#4220) ### What this PR does / why we need it? This PR update the prefixcache threshold for qwen3-32b-int from 0.4 to 0.8, as the baseline has been improved. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running the test - vLLM version: v0.11.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379 Signed-off-by: jiangyunfan1 --- .../e2e/nightly/features/test_prefix_cache_qwen3_32b_int8.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/e2e/nightly/features/test_prefix_cache_qwen3_32b_int8.py b/tests/e2e/nightly/features/test_prefix_cache_qwen3_32b_int8.py index 90589583..3ee23287 100644 --- a/tests/e2e/nightly/features/test_prefix_cache_qwen3_32b_int8.py +++ b/tests/e2e/nightly/features/test_prefix_cache_qwen3_32b_int8.py @@ -98,7 +98,7 @@ async def test_models(model: str) -> None: run_aisbench_cases(model, port, aisbench_warm_up) result = run_aisbench_cases(model, port, aisbench_cases75) TTFT75 = get_TTFT(result) - assert TTFT75 < 0.4 * TTFT0, f"The TTFT for prefix75 {TTFT75} is not less than 0.4*TTFT for prefix0 {TTFT0}." + assert TTFT75 < 0.8 * TTFT0, f"The TTFT for prefix75 {TTFT75} is not less than 0.8*TTFT for prefix0 {TTFT0}." print( - f"The TTFT for prefix75 {TTFT75} is less than 0.4*TTFT for prefix0 {TTFT0}." + f"The TTFT for prefix75 {TTFT75} is less than 0.8*TTFT for prefix0 {TTFT0}." )