[e2e]Fixed the issue that pyhccl e2e cannot run continuously with other tests (#1246)

### What this PR does / why we need it?
1.Fixed the issue that pyhccl e2e cannot run continuously with other
tests.
2.Cleaned up the resources occupied by the dynamic_npugraph_batchsize
e2e test.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
This is a e2e test

e2e multi-cards tests local running successfully.


- vLLM version: v0.9.2
- vLLM main:
0df4d9b06b

Signed-off-by: leo-pony <nengjunma@outlook.com>
This commit is contained in:
leo-pony
2025-07-29 19:38:30 +08:00
committed by GitHub
parent 61fc35184b
commit 4df8e0027c
2 changed files with 54 additions and 41 deletions

View File

@@ -16,7 +16,9 @@
#
import pytest
import torch
from vllm import LLM, SamplingParams
from vllm import SamplingParams
from tests.e2e.conftest import VllmRunner
MODELS = [
"Qwen/Qwen2.5-0.5B-Instruct",
@@ -38,20 +40,20 @@ prompts = [
def test_models(model: str, tp_size: int, max_tokens: int, temperature: int,
ignore_eos: bool) -> None:
# Create an LLM.
llm = LLM(
model=model,
tensor_parallel_size=tp_size,
)
# Prepare sampling_parames
sampling_params = SamplingParams(
max_tokens=max_tokens,
temperature=temperature,
ignore_eos=ignore_eos,
)
with VllmRunner(
model_name=model,
tensor_parallel_size=tp_size,
) as vllm_model:
# Prepare sampling_parames
sampling_params = SamplingParams(
max_tokens=max_tokens,
temperature=temperature,
ignore_eos=ignore_eos,
)
# Generate texts from the prompts.
# The output is a list of RequestOutput objects
outputs = llm.generate(prompts, sampling_params)
torch.npu.synchronize()
# The output length should be equal to prompts length.
assert len(outputs) == len(prompts)
# Generate texts from the prompts.
# The output is a list of RequestOutput objects
outputs = vllm_model.generate(prompts, sampling_params)
torch.npu.synchronize()
# The output length should be equal to prompts length.
assert len(outputs) == len(prompts)