diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 7c44013b..5292673d 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -280,7 +280,7 @@ class VllmRunner: disable_log_stats: bool = True, tensor_parallel_size: int = 1, block_size: int = 16, - enable_chunked_prefill: bool = False, + enable_chunked_prefill: bool = True, swap_space: int = 4, enforce_eager: Optional[bool] = False, quantization: Optional[str] = None, diff --git a/tests/e2e/multicard/test_prefix_caching.py b/tests/e2e/multicard/test_prefix_caching.py index f16c94b1..114d5d72 100644 --- a/tests/e2e/multicard/test_prefix_caching.py +++ b/tests/e2e/multicard/test_prefix_caching.py @@ -58,7 +58,6 @@ INPUT_PROMPTS = [ ] -@pytest.mark.skip(reason="Fix me, the accuracy is not correct") @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("max_tokens", [50]) def test_prefix_cache_with_v1_scheduler(model: str, max_tokens: int) -> None: diff --git a/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py b/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py index aec67bc3..0902fe6d 100644 --- a/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +++ b/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py @@ -118,7 +118,6 @@ def test_eagle_correctness( spec_model_name = eagle3_model_name() if use_eagle3 else eagle_model_name() with VllmRunner( model_name, - enable_chunked_prefill=True, max_num_seqs=1, max_num_batched_tokens=2048, gpu_memory_utilization=0.6,