From 981a14f8d536a073cde064c6f00c1b49f2c92b4b Mon Sep 17 00:00:00 2001
From: wangxiyuan <wangxiyuan1007@gmail.com>
Date: Tue, 2 Dec 2025 08:54:34 +0800
Subject: [PATCH] [CI]enable chunked prefill by default (#4569)

set `enable_chunked_prefill` to True for e2e test by default to keep the
same behavior with vLLM

- vLLM version: v0.11.2

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
---
 tests/e2e/conftest.py                                      | 2 +-
 tests/e2e/multicard/test_prefix_caching.py                 | 1 -
 tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py | 1 -
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
index 7c44013b..5292673d 100644
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@@ -280,7 +280,7 @@ class VllmRunner:
         disable_log_stats: bool = True,
         tensor_parallel_size: int = 1,
         block_size: int = 16,
-        enable_chunked_prefill: bool = False,
+        enable_chunked_prefill: bool = True,
         swap_space: int = 4,
         enforce_eager: Optional[bool] = False,
         quantization: Optional[str] = None,
diff --git a/tests/e2e/multicard/test_prefix_caching.py b/tests/e2e/multicard/test_prefix_caching.py
index f16c94b1..114d5d72 100644
--- a/tests/e2e/multicard/test_prefix_caching.py
+++ b/tests/e2e/multicard/test_prefix_caching.py
@@ -58,7 +58,6 @@ INPUT_PROMPTS = [
 ]
 
 
-@pytest.mark.skip(reason="Fix me, the accuracy is not correct")
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("max_tokens", [50])
 def test_prefix_cache_with_v1_scheduler(model: str, max_tokens: int) -> None:
diff --git a/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py b/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
index aec67bc3..0902fe6d 100644
--- a/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
+++ b/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
@@ -118,7 +118,6 @@ def test_eagle_correctness(
     spec_model_name = eagle3_model_name() if use_eagle3 else eagle_model_name()
     with VllmRunner(
             model_name,
-            enable_chunked_prefill=True,
             max_num_seqs=1,
             max_num_batched_tokens=2048,
             gpu_memory_utilization=0.6,