ACLgraph enable: Test cases revisions for all features (#3388)

### What this PR does / why we need it? This PR revise the test cases of various features on the warehouse which add the enablement of aclgraph to the test cases. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? ut - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: lilinsiman <lilinsiman@gmail.com>
2025-10-17 17:15:19 +08:00
parent bf87606932
commit 1b424fb7f1
17 changed files with 34 additions and 117 deletions
--- a/tests/e2e/multicard/test_prefix_caching.py
+++ b/tests/e2e/multicard/test_prefix_caching.py
@@ -62,7 +62,7 @@ INPUT_PROMPTS = [
@pytest.mark.parametrize("max_tokens", [50])
 def test_prefix_cache_with_v1_scheduler(model: str, max_tokens: int) -> None:
    with VllmRunner(model,
-                    enforce_eager=True,
+                    enforce_eager=False,
                    max_model_len=2048,
                    tensor_parallel_size=2,
                    gpu_memory_utilization=0.7) as vllm_model:
@@ -71,7 +71,7 @@ def test_prefix_cache_with_v1_scheduler(model: str, max_tokens: int) -> None:

    with VllmRunner(model,
                    enable_prefix_caching=False,
-                    enforce_eager=True,
+                    enforce_eager=False,
                    max_model_len=2048,
                    tensor_parallel_size=2,
                    gpu_memory_utilization=0.7) as vllm_model:
@@ -96,7 +96,7 @@ def test_prefix_cache_with_ascend_scheduler(model: str,
                            'enabled': True,
                        },
                    },
-                    enforce_eager=True,
+                    enforce_eager=False,
                    max_model_len=2048,
                    tensor_parallel_size=2,
                    gpu_memory_utilization=0.7) as vllm_model:
@@ -109,7 +109,7 @@ def test_prefix_cache_with_ascend_scheduler(model: str,
                            'enable_prefix_caching': True,
                        },
                    },
-                    enforce_eager=True,
+                    enforce_eager=False,
                    max_model_len=2048,
                    tensor_parallel_size=2,
                    gpu_memory_utilization=0.7) as vllm_model: