ACLgraph enable: Test cases revisions for all features (#3388)

### What this PR does / why we need it?
This PR revise the test cases of various features on the warehouse which
add the enablement of aclgraph to the test cases.

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
ut

- vLLM version: v0.11.0rc3
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0

Signed-off-by: lilinsiman <lilinsiman@gmail.com>
This commit is contained in:
lilinsiman
2025-10-17 17:15:19 +08:00
committed by GitHub
parent bf87606932
commit 1b424fb7f1
17 changed files with 34 additions and 117 deletions

View File

@@ -71,7 +71,7 @@ def test_ngram_correctness(
should be the same when using ngram speculative decoding.
'''
pytest.skip("Not current support for the test.")
ref_llm = LLM(model=model_name, max_model_len=1024, enforce_eager=True)
ref_llm = LLM(model=model_name, max_model_len=1024, enforce_eager=False)
ref_outputs = ref_llm.chat(test_prompts, sampling_config)
del ref_llm
with VllmRunner(model_name,
@@ -82,7 +82,7 @@ def test_ngram_correctness(
"num_speculative_tokens": 3,
},
max_model_len=1024,
enforce_eager=True) as runner:
enforce_eager=False) as runner:
spec_outputs = runner.model.chat(test_prompts, sampling_config)
matches = 0
misses = 0
@@ -111,7 +111,7 @@ def test_eagle_correctness(
should be the same when using eagle speculative decoding.
'''
ref_llm = LLM(model=model_name, max_model_len=2048, enforce_eager=True)
ref_llm = LLM(model=model_name, max_model_len=2048, enforce_eager=False)
ref_outputs = ref_llm.chat(test_prompts, sampling_config)
del ref_llm
@@ -129,7 +129,7 @@ def test_eagle_correctness(
"max_model_len": 128,
},
max_model_len=128,
enforce_eager=True,
enforce_eager=False,
) as runner:
spec_outputs = runner.model.chat(test_prompts, sampling_config)