ACLgraph enable: Test cases revisions for all features (#3388)

### What this PR does / why we need it? This PR revise the test cases of various features on the warehouse which add the enablement of aclgraph to the test cases. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? ut - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: lilinsiman <lilinsiman@gmail.com>
2025-10-17 17:15:19 +08:00
parent bf87606932
commit 1b424fb7f1
17 changed files with 34 additions and 117 deletions
--- a/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
+++ b/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
@@ -71,7 +71,7 @@ def test_ngram_correctness(
    should be the same when using ngram speculative decoding.
    '''
    pytest.skip("Not current support for the test.")
-    ref_llm = LLM(model=model_name, max_model_len=1024, enforce_eager=True)
+    ref_llm = LLM(model=model_name, max_model_len=1024, enforce_eager=False)
    ref_outputs = ref_llm.chat(test_prompts, sampling_config)
    del ref_llm
    with VllmRunner(model_name,
@@ -82,7 +82,7 @@ def test_ngram_correctness(
                        "num_speculative_tokens": 3,
                    },
                    max_model_len=1024,
-                    enforce_eager=True) as runner:
+                    enforce_eager=False) as runner:
        spec_outputs = runner.model.chat(test_prompts, sampling_config)
    matches = 0
    misses = 0
@@ -111,7 +111,7 @@ def test_eagle_correctness(
    should be the same when using eagle speculative decoding.
    '''

-    ref_llm = LLM(model=model_name, max_model_len=2048, enforce_eager=True)
+    ref_llm = LLM(model=model_name, max_model_len=2048, enforce_eager=False)
    ref_outputs = ref_llm.chat(test_prompts, sampling_config)
    del ref_llm

@@ -129,7 +129,7 @@ def test_eagle_correctness(
                "max_model_len": 128,
            },
            max_model_len=128,
-            enforce_eager=True,
+            enforce_eager=False,
    ) as runner:
        spec_outputs = runner.model.chat(test_prompts, sampling_config)