ACLgraph enable: Test cases revisions for all features (#3388)

### What this PR does / why we need it?
This PR revise the test cases of various features on the warehouse which
add the enablement of aclgraph to the test cases.

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
ut

- vLLM version: v0.11.0rc3
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0

Signed-off-by: lilinsiman <lilinsiman@gmail.com>
This commit is contained in:
lilinsiman
2025-10-17 17:15:19 +08:00
committed by GitHub
parent bf87606932
commit 1b424fb7f1
17 changed files with 34 additions and 117 deletions

View File

@@ -62,7 +62,7 @@ INPUT_PROMPTS = [
@pytest.mark.parametrize("max_tokens", [50])
def test_prefix_cache_with_v1_scheduler(model: str, max_tokens: int) -> None:
with VllmRunner(model,
enforce_eager=True,
enforce_eager=False,
max_model_len=2048,
tensor_parallel_size=2,
gpu_memory_utilization=0.7) as vllm_model:
@@ -71,7 +71,7 @@ def test_prefix_cache_with_v1_scheduler(model: str, max_tokens: int) -> None:
with VllmRunner(model,
enable_prefix_caching=False,
enforce_eager=True,
enforce_eager=False,
max_model_len=2048,
tensor_parallel_size=2,
gpu_memory_utilization=0.7) as vllm_model:
@@ -96,7 +96,7 @@ def test_prefix_cache_with_ascend_scheduler(model: str,
'enabled': True,
},
},
enforce_eager=True,
enforce_eager=False,
max_model_len=2048,
tensor_parallel_size=2,
gpu_memory_utilization=0.7) as vllm_model:
@@ -109,7 +109,7 @@ def test_prefix_cache_with_ascend_scheduler(model: str,
'enable_prefix_caching': True,
},
},
enforce_eager=True,
enforce_eager=False,
max_model_len=2048,
tensor_parallel_size=2,
gpu_memory_utilization=0.7) as vllm_model: