From 9cbce423ce8d8f54347e01985acd5549959fafb5 Mon Sep 17 00:00:00 2001
From: wangxiyuan <wangxiyuan1007@gmail.com>
Date: Tue, 24 Jun 2025 10:05:59 +0800
Subject: [PATCH] [MISC] Remove useless patch (#1366)

### What this PR does / why we need it?
`stateless_init_dp_group` in vllm works with non-cuda platform now.
Remove this useless patch.

Which was introduced in vllm-ascend by
https://github.com/vllm-project/vllm-ascend/commit/e74331a1ede31c69ec0b1b97bd407d38742caa9c
(v0.8.4rc2)
vLLM upstream merged:
https://github.com/vllm-project/vllm/commit/3e472d882a6071813bf6e683f5b9269e0d1d9678
(v0.8.0)

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
CI passed

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
---
 vllm_ascend/patch/__init__.py                 | 10 ----------
 .../patch_common/patch_distributed.py         | 20 -------------------
 2 files changed, 30 deletions(-)

diff --git a/vllm_ascend/patch/__init__.py b/vllm_ascend/patch/__init__.py
index 59d6035..66a63a4 100644
--- a/vllm_ascend/patch/__init__.py
+++ b/vllm_ascend/patch/__init__.py
@@ -56,16 +56,6 @@
 #       Need a PR to vllm to support get port from environment.
 #    Future Plan:
 #       Remove those patch when vllm merged them
-#   3. `vllm.config.ParallelConfig.ParallelConfig.stateless_init_dp_group`
-#    Why:
-#       vLLM use gloo backend by default to initialize stateless dp process gourp, but we want to use hccl here to
-#       get better performance
-#    How：
-#       adopt nccl backend to init process group.(Now we still use gloo, it's just a placeholder, we'll use nccl in the future)
-#    Related PR (if no, explain why):
-#       Need a PR to vllm to support more backend.
-#    Future Plan:
-#       Remove those patch when vllm support more backend.
 #
 # * Worker Patch:
 # ===============
diff --git a/vllm_ascend/patch/platform/patch_common/patch_distributed.py b/vllm_ascend/patch/platform/patch_common/patch_distributed.py
index d094886..1e9a7b0 100644
--- a/vllm_ascend/patch/platform/patch_common/patch_distributed.py
+++ b/vllm_ascend/patch/platform/patch_common/patch_distributed.py
@@ -21,10 +21,7 @@ import torch
 import vllm
 import vllm.distributed
 import vllm.envs as envs
-from torch.distributed import ProcessGroup
 from vllm.config import ParallelConfig
-from vllm.distributed.utils import \
-    stateless_init_torch_distributed_process_group
 
 from vllm_ascend.utils import NullHandle, is_310p
 
@@ -65,25 +62,8 @@ def parallel_config_get_dp_port(self) -> int:
     return port
 
 
-def stateless_init_dp_group(self) -> "ProcessGroup":
-    # TODO(Yizhou): Currently we have to set the backend to gloo
-    # because in vllm.config.ParallelConfig.has_unfinished_dp the
-    # device is set to cpu. We need to fix this in the future.
-    # We need to compare the performance of gloo and hccl and then
-    # decide which one to use.
-    dp_group = stateless_init_torch_distributed_process_group(
-        self.data_parallel_master_ip,
-        self.get_next_dp_init_port(),
-        self.data_parallel_rank,
-        self.data_parallel_size,
-        backend="gloo")
-
-    return dp_group
-
-
 vllm.distributed.parallel_state.destroy_model_parallel = ascend_destroy_model_parallel
 ParallelConfig.get_next_dp_init_port = parallel_config_get_dp_port
-ParallelConfig.stateless_init_dp_group = stateless_init_dp_group
 
 
 def communication_adaptation_310p():