[CI]enable chunked prefill by default (#4569)

set `enable_chunked_prefill` to True for e2e test by default to keep the
same behavior with vLLM

- vLLM version: v0.11.2

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-12-02 08:54:34 +08:00
committed by GitHub
parent 6b9a997076
commit 981a14f8d5
3 changed files with 1 additions and 3 deletions

View File

@@ -280,7 +280,7 @@ class VllmRunner:
disable_log_stats: bool = True,
tensor_parallel_size: int = 1,
block_size: int = 16,
enable_chunked_prefill: bool = False,
enable_chunked_prefill: bool = True,
swap_space: int = 4,
enforce_eager: Optional[bool] = False,
quantization: Optional[str] = None,