Fix e2e data parallel test: add resource release code (#1881)

### What this PR does / why we need it?
Fix e2e data parallel test: add resource release code and give more time
to engine to pause their processing loops before exiting.

### Does this PR introduce _any_ user-facing change?
No

- vLLM version: v0.9.2
- vLLM main:
5895afd780

Signed-off-by: leo-pony <nengjunma@outlook.com>
This commit is contained in:
leo-pony
2025-07-19 11:39:48 +08:00
committed by GitHub
parent b824525be3
commit 2ee90461d0
2 changed files with 18 additions and 4 deletions

View File

@@ -56,14 +56,19 @@ Multi-node:
import os import os
from time import sleep from time import sleep
import contextlib
import gc
import torch
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
from vllm.utils import get_open_port from vllm.utils import get_open_port
from vllm.distributed.parallel_state import ( # noqa E402
destroy_distributed_environment, destroy_model_parallel)
os.environ["VLLM_USE_MODELSCOPE"] = "True" os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
def parse_args(): def parse_args():
import argparse import argparse
@@ -110,6 +115,15 @@ def parse_args():
return parser.parse_args() return parser.parse_args()
def cleanup_env_and_memory():
destroy_model_parallel()
destroy_distributed_environment()
with contextlib.suppress(AssertionError):
torch.distributed.destroy_process_group()
gc.collect()
torch.npu.empty_cache()
torch.npu.reset_peak_memory_stats()
def main( def main(
model, model,
dp_size, dp_size,
@@ -185,8 +199,9 @@ def main(
f"Generated text: {generated_text!r}") f"Generated text: {generated_text!r}")
# Give engines time to pause their processing loops before exiting. # Give engines time to pause their processing loops before exiting.
sleep(1) sleep(5)
del llm
cleanup_env_and_memory()
if __name__ == "__main__": if __name__ == "__main__":
args = parse_args() args = parse_args()

View File

@@ -30,7 +30,6 @@ import pytest
MODELS = ["Qwen/Qwen2.5-0.5B-Instruct"] MODELS = ["Qwen/Qwen2.5-0.5B-Instruct"]
@pytest.mark.skipif(True, reason="TODO: fix dp timeout error in ci")
@pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("max_tokens", [32]) @pytest.mark.parametrize("max_tokens", [32])
@patch.dict(os.environ, {"ASCEND_RT_VISIBLE_DEVICES": "0,1"}) @patch.dict(os.environ, {"ASCEND_RT_VISIBLE_DEVICES": "0,1"})