Fix e2e data parallel test: add resource release code (#1881)

### What this PR does / why we need it?
Fix e2e data parallel test: add resource release code and give more time
to engine to pause their processing loops before exiting.

### Does this PR introduce _any_ user-facing change?
No

- vLLM version: v0.9.2
- vLLM main:
5895afd780

Signed-off-by: leo-pony <nengjunma@outlook.com>
This commit is contained in:
leo-pony
2025-07-19 11:39:48 +08:00
committed by GitHub
parent b824525be3
commit 2ee90461d0
2 changed files with 18 additions and 4 deletions

View File

@@ -56,14 +56,19 @@ Multi-node:
import os
from time import sleep
import contextlib
import gc
import torch
from vllm import LLM, SamplingParams
from vllm.utils import get_open_port
from vllm.distributed.parallel_state import ( # noqa E402
destroy_distributed_environment, destroy_model_parallel)
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
def parse_args():
import argparse
@@ -110,6 +115,15 @@ def parse_args():
return parser.parse_args()
def cleanup_env_and_memory():
destroy_model_parallel()
destroy_distributed_environment()
with contextlib.suppress(AssertionError):
torch.distributed.destroy_process_group()
gc.collect()
torch.npu.empty_cache()
torch.npu.reset_peak_memory_stats()
def main(
model,
dp_size,
@@ -185,8 +199,9 @@ def main(
f"Generated text: {generated_text!r}")
# Give engines time to pause their processing loops before exiting.
sleep(1)
sleep(5)
del llm
cleanup_env_and_memory()
if __name__ == "__main__":
args = parse_args()