From 9a1cfb48d4d7d2a57f6f71e6f7c0df5a79c22952 Mon Sep 17 00:00:00 2001
From: jiangyunfan1 <jiangyunfan1@h-partners.com>
Date: Mon, 17 Nov 2025 19:06:54 +0800
Subject: [PATCH] [TEST]Update prefixcache perf threshold for qwen3-32b-int8
 (#4220)

### What this PR does / why we need it?
This PR update the prefixcache threshold for qwen3-32b-int from 0.4 to
0.8, as the baseline has been improved.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
By running the test
- vLLM version: v0.11.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379

Signed-off-by: jiangyunfan1 <jiangyunfan1@h-partners.com>
---
 .../e2e/nightly/features/test_prefix_cache_qwen3_32b_int8.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/e2e/nightly/features/test_prefix_cache_qwen3_32b_int8.py b/tests/e2e/nightly/features/test_prefix_cache_qwen3_32b_int8.py
index 90589583..3ee23287 100644
--- a/tests/e2e/nightly/features/test_prefix_cache_qwen3_32b_int8.py
+++ b/tests/e2e/nightly/features/test_prefix_cache_qwen3_32b_int8.py
@@ -98,7 +98,7 @@ async def test_models(model: str) -> None:
         run_aisbench_cases(model, port, aisbench_warm_up)
         result = run_aisbench_cases(model, port, aisbench_cases75)
         TTFT75 = get_TTFT(result)
-    assert TTFT75 < 0.4 * TTFT0, f"The TTFT for prefix75 {TTFT75} is not less than 0.4*TTFT for prefix0 {TTFT0}."
+    assert TTFT75 < 0.8 * TTFT0, f"The TTFT for prefix75 {TTFT75} is not less than 0.8*TTFT for prefix0 {TTFT0}."
     print(
-        f"The TTFT for prefix75 {TTFT75} is less than 0.4*TTFT for prefix0 {TTFT0}."
+        f"The TTFT for prefix75 {TTFT75} is less than 0.8*TTFT for prefix0 {TTFT0}."
     )