[MISC] Remove useless patch (#1366)

### What this PR does / why we need it?
`stateless_init_dp_group` in vllm works with non-cuda platform now.
Remove this useless patch.

Which was introduced in vllm-ascend by
e74331a1ed
(v0.8.4rc2)
vLLM upstream merged:
3e472d882a
(v0.8.0)

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
CI passed

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-06-24 10:05:59 +08:00
committed by GitHub
parent 5177bef87a
commit 9cbce423ce
2 changed files with 0 additions and 30 deletions

View File

@@ -56,16 +56,6 @@
# Need a PR to vllm to support get port from environment. # Need a PR to vllm to support get port from environment.
# Future Plan: # Future Plan:
# Remove those patch when vllm merged them # Remove those patch when vllm merged them
# 3. `vllm.config.ParallelConfig.ParallelConfig.stateless_init_dp_group`
# Why:
# vLLM use gloo backend by default to initialize stateless dp process gourp, but we want to use hccl here to
# get better performance
# How
# adopt nccl backend to init process group.(Now we still use gloo, it's just a placeholder, we'll use nccl in the future)
# Related PR (if no, explain why):
# Need a PR to vllm to support more backend.
# Future Plan:
# Remove those patch when vllm support more backend.
# #
# * Worker Patch: # * Worker Patch:
# =============== # ===============

View File

@@ -21,10 +21,7 @@ import torch
import vllm import vllm
import vllm.distributed import vllm.distributed
import vllm.envs as envs import vllm.envs as envs
from torch.distributed import ProcessGroup
from vllm.config import ParallelConfig from vllm.config import ParallelConfig
from vllm.distributed.utils import \
stateless_init_torch_distributed_process_group
from vllm_ascend.utils import NullHandle, is_310p from vllm_ascend.utils import NullHandle, is_310p
@@ -65,25 +62,8 @@ def parallel_config_get_dp_port(self) -> int:
return port return port
def stateless_init_dp_group(self) -> "ProcessGroup":
# TODO(Yizhou): Currently we have to set the backend to gloo
# because in vllm.config.ParallelConfig.has_unfinished_dp the
# device is set to cpu. We need to fix this in the future.
# We need to compare the performance of gloo and hccl and then
# decide which one to use.
dp_group = stateless_init_torch_distributed_process_group(
self.data_parallel_master_ip,
self.get_next_dp_init_port(),
self.data_parallel_rank,
self.data_parallel_size,
backend="gloo")
return dp_group
vllm.distributed.parallel_state.destroy_model_parallel = ascend_destroy_model_parallel vllm.distributed.parallel_state.destroy_model_parallel = ascend_destroy_model_parallel
ParallelConfig.get_next_dp_init_port = parallel_config_get_dp_port ParallelConfig.get_next_dp_init_port = parallel_config_get_dp_port
ParallelConfig.stateless_init_dp_group = stateless_init_dp_group
def communication_adaptation_310p(): def communication_adaptation_310p():