From e74331a1ede31c69ec0b1b97bd407d38742caa9c Mon Sep 17 00:00:00 2001 From: Pleaplusone <38376071+ganyi1996ppo@users.noreply.github.com> Date: Wed, 23 Apr 2025 15:47:51 +0800 Subject: [PATCH] Add dp initialize patch with hccl backend (#626) ### What this PR does / why we need it? Add dp stateless process group initialization path with hccl backend as vllm-ascend patch. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? --------- Signed-off-by: ganyi --- vllm_ascend/patch/__init__.py | 10 +++++++++- .../platform/patch_common/patch_distributed.py | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/vllm_ascend/patch/__init__.py b/vllm_ascend/patch/__init__.py index d4286bd..93c0b0b 100644 --- a/vllm_ascend/patch/__init__.py +++ b/vllm_ascend/patch/__init__.py @@ -87,7 +87,15 @@ # Future Plan: # Its a workaround in vllm-ascend to enable multi-node dp inference, maybe removed if vllm have better plan # on multi-node dp inference implementation -# +# 4. `ParallelConfig.stateless_init_dp_group` +# Why: +# vLLM use gloo backend by default to initialize stateless dp process gourp, but we want to use hccl here to +# get better performance +# How: +# adopt nccl backend to init process group +# Related PR (if no, explain why): no related PR, we want add this ability into vllm +# Future Plan: +# Remove those patch when vllm merged them # * Worker Patch: # =============== # ** File: worker/patch_0_8_4/patch_metrics.py ** diff --git a/vllm_ascend/patch/platform/patch_common/patch_distributed.py b/vllm_ascend/patch/platform/patch_common/patch_distributed.py index 1b356a9..ce43836 100644 --- a/vllm_ascend/patch/platform/patch_common/patch_distributed.py +++ b/vllm_ascend/patch/platform/patch_common/patch_distributed.py @@ -152,6 +152,21 @@ def parallel_config_get_dp_port(self) -> int: return port +def ascend_stateless_init_dp_group(self) -> "ProcessGroup": + from vllm.distributed.utils import \ + stateless_init_torch_distributed_process_group + + dp_group = stateless_init_torch_distributed_process_group( + self.data_parallel_master_ip, + self.get_next_dp_init_port(), + self.data_parallel_rank, + self.data_parallel_size, + backend="hccl") + + return dp_group + + vllm.distributed.parallel_state.destroy_model_parallel = ascend_destroy_model_parallel vllm.distributed.stateless_init_torch_distributed_process_group = ascend_stateless_init_torch_distributed_process_group ParallelConfig.get_next_dp_init_port = parallel_config_get_dp_port +ParallelConfig.stateless_init_dp_group = ascend_stateless_init_dp_group