From 9cbce423ce8d8f54347e01985acd5549959fafb5 Mon Sep 17 00:00:00 2001 From: wangxiyuan Date: Tue, 24 Jun 2025 10:05:59 +0800 Subject: [PATCH] [MISC] Remove useless patch (#1366) ### What this PR does / why we need it? `stateless_init_dp_group` in vllm works with non-cuda platform now. Remove this useless patch. Which was introduced in vllm-ascend by https://github.com/vllm-project/vllm-ascend/commit/e74331a1ede31c69ec0b1b97bd407d38742caa9c (v0.8.4rc2) vLLM upstream merged: https://github.com/vllm-project/vllm/commit/3e472d882a6071813bf6e683f5b9269e0d1d9678 (v0.8.0) ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed Signed-off-by: wangxiyuan --- vllm_ascend/patch/__init__.py | 10 ---------- .../patch_common/patch_distributed.py | 20 ------------------- 2 files changed, 30 deletions(-) diff --git a/vllm_ascend/patch/__init__.py b/vllm_ascend/patch/__init__.py index 59d6035..66a63a4 100644 --- a/vllm_ascend/patch/__init__.py +++ b/vllm_ascend/patch/__init__.py @@ -56,16 +56,6 @@ # Need a PR to vllm to support get port from environment. # Future Plan: # Remove those patch when vllm merged them -# 3. `vllm.config.ParallelConfig.ParallelConfig.stateless_init_dp_group` -# Why: -# vLLM use gloo backend by default to initialize stateless dp process gourp, but we want to use hccl here to -# get better performance -# How: -# adopt nccl backend to init process group.(Now we still use gloo, it's just a placeholder, we'll use nccl in the future) -# Related PR (if no, explain why): -# Need a PR to vllm to support more backend. -# Future Plan: -# Remove those patch when vllm support more backend. # # * Worker Patch: # =============== diff --git a/vllm_ascend/patch/platform/patch_common/patch_distributed.py b/vllm_ascend/patch/platform/patch_common/patch_distributed.py index d094886..1e9a7b0 100644 --- a/vllm_ascend/patch/platform/patch_common/patch_distributed.py +++ b/vllm_ascend/patch/platform/patch_common/patch_distributed.py @@ -21,10 +21,7 @@ import torch import vllm import vllm.distributed import vllm.envs as envs -from torch.distributed import ProcessGroup from vllm.config import ParallelConfig -from vllm.distributed.utils import \ - stateless_init_torch_distributed_process_group from vllm_ascend.utils import NullHandle, is_310p @@ -65,25 +62,8 @@ def parallel_config_get_dp_port(self) -> int: return port -def stateless_init_dp_group(self) -> "ProcessGroup": - # TODO(Yizhou): Currently we have to set the backend to gloo - # because in vllm.config.ParallelConfig.has_unfinished_dp the - # device is set to cpu. We need to fix this in the future. - # We need to compare the performance of gloo and hccl and then - # decide which one to use. - dp_group = stateless_init_torch_distributed_process_group( - self.data_parallel_master_ip, - self.get_next_dp_init_port(), - self.data_parallel_rank, - self.data_parallel_size, - backend="gloo") - - return dp_group - - vllm.distributed.parallel_state.destroy_model_parallel = ascend_destroy_model_parallel ParallelConfig.get_next_dp_init_port = parallel_config_get_dp_port -ParallelConfig.stateless_init_dp_group = stateless_init_dp_group def communication_adaptation_310p():