From 2ee90461d0dc28cd8823338600d4af718b389510 Mon Sep 17 00:00:00 2001 From: leo-pony Date: Sat, 19 Jul 2025 11:39:48 +0800 Subject: [PATCH] Fix e2e data parallel test: add resource release code (#1881) ### What this PR does / why we need it? Fix e2e data parallel test: add resource release code and give more time to engine to pause their processing loops before exiting. ### Does this PR introduce _any_ user-facing change? No - vLLM version: v0.9.2 - vLLM main: https://github.com/vllm-project/vllm/commit/5895afd78047614a037cac1fc4634825c749fd59 Signed-off-by: leo-pony --- examples/offline_data_parallel.py | 21 ++++++++++++++++++--- tests/e2e/multicard/test_data_parallel.py | 1 - 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/examples/offline_data_parallel.py b/examples/offline_data_parallel.py index 024ef98..e497a13 100644 --- a/examples/offline_data_parallel.py +++ b/examples/offline_data_parallel.py @@ -56,14 +56,19 @@ Multi-node: import os from time import sleep +import contextlib +import gc + +import torch from vllm import LLM, SamplingParams from vllm.utils import get_open_port +from vllm.distributed.parallel_state import ( # noqa E402 + destroy_distributed_environment, destroy_model_parallel) os.environ["VLLM_USE_MODELSCOPE"] = "True" os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" - def parse_args(): import argparse @@ -110,6 +115,15 @@ def parse_args(): return parser.parse_args() +def cleanup_env_and_memory(): + destroy_model_parallel() + destroy_distributed_environment() + with contextlib.suppress(AssertionError): + torch.distributed.destroy_process_group() + gc.collect() + torch.npu.empty_cache() + torch.npu.reset_peak_memory_stats() + def main( model, dp_size, @@ -185,8 +199,9 @@ def main( f"Generated text: {generated_text!r}") # Give engines time to pause their processing loops before exiting. - sleep(1) - + sleep(5) + del llm + cleanup_env_and_memory() if __name__ == "__main__": args = parse_args() diff --git a/tests/e2e/multicard/test_data_parallel.py b/tests/e2e/multicard/test_data_parallel.py index 57f14ac..0b945e0 100644 --- a/tests/e2e/multicard/test_data_parallel.py +++ b/tests/e2e/multicard/test_data_parallel.py @@ -30,7 +30,6 @@ import pytest MODELS = ["Qwen/Qwen2.5-0.5B-Instruct"] -@pytest.mark.skipif(True, reason="TODO: fix dp timeout error in ci") @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("max_tokens", [32]) @patch.dict(os.environ, {"ASCEND_RT_VISIBLE_DEVICES": "0,1"})