Add note for deepseek related docs and remove unnecessary comments (#590)

### What this PR does / why we need it? Add notes for deepseek's patch and remove some of the unnecessary comments --------- Signed-off-by: ganyi <pleaplusone.gy@gmail.com>
2025-04-22 09:59:09 +08:00
parent c5850d302d
commit d12a057df8
6 changed files with 78 additions and 199 deletions
--- a/vllm_ascend/patch/platform/patch_0_8_4/patch_distributed.py
+++ b/vllm_ascend/patch/platform/patch_0_8_4/patch_distributed.py
@@ -1,3 +1,22 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# Copyright 2023 The vLLM team.
+#
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Adapted from vllm/model_executor/models/qwen2_vl.py
+# This file is a part of the vllm-ascend project.
+
 import torch
 import vllm
 import vllm.distributed
@@ -8,6 +27,40 @@ from torch.distributed.distributed_c10d import (Backend, PrefixStore,
 from torch.distributed.rendezvous import rendezvous
 from vllm.config import ParallelConfig

+# What's Patched and how it works:
+# ** File: platform/patch_0_8_4/patch_distributed.py**
+#   1. `vllm.distributed.parallel_state.destroy_model_parallel()`
+#    Why:
+#       vllm dose not support outside platform maintain its own `CoordinatorGroup`, vllm-ascend maintain EP and ETP
+#       inside of the repo, and needs a common interface to destroy them, this patch add the interface of destroy
+#       platform owned `CoordinatorGroup` to make sure all the CoordinateGroup can be properly destroyed
+#    How：
+#       Call platform method `destroy_platform_model_parallel` to destroy all the `CoordinateGroup`
+#    Related PR (if no, explain why): no related PR, we want add this ability into vllm
+#    Future Plan:
+#       Remove those patch when vllm merged them
+#   2. `vllm.distributed.stateless_init_torch_distributed_process_group()`
+#    Why:
+#       The stateless process group can not be initialized except from gloo and nccl backend, vllm-ascend
+#       needs to initialize its own stateless process group for communication, so we add the platform related
+#       call to the `stateless_init_torch_distributed_process_group`, to enable other platform which may support
+#       stateless process group initialize method
+#    How：
+#       Call platform method `platform_has_backend_register` to judge if there is a stateless process group initialize
+#       method and call platform method `platform_register_backend` to initialize them
+#    Related PR (if no, explain why): no related PR, we want add this ability into vllm
+#    Future Plan:
+#       Remove those patch when vllm merged them
+#   3. `ParallelConfig.get_next_dp_init_port`
+#    Why:
+#       We want to get dp port from env variable, so the multi-node inference can be properly initialized and run.
+#    How：
+#       Get the dp port from env variable enable multi-mode dp inference
+#    Related PR (if no, explain why): no related PR, we want add this ability into vllm
+#    Future Plan:
+#       Its a workaround in vllm-ascend to enable multi-node dp inference, maybe removed if vllm have better plan
+#       on multi-node dp inference implementation
+

 def ascend_destroy_model_parallel():
    """Set the groups to none and destroy them."""