Roll back to use vllm custom allreduce (#3006)

2025-01-20 04:03:15 -08:00
parent dc1881326f
commit 89cd923581
10 changed files with 18 additions and 65 deletions
--- a/python/sglang/srt/distributed/init.py
+++ b/python/sglang/srt/distributed/init.py
@@ -1,3 +1,3 @@
-from .communication_op import *
-from .parallel_state import *
-from .utils import *
+from sglang.srt.distributed.communication_op import *
+from sglang.srt.distributed.parallel_state import *
+from sglang.srt.distributed.utils import *
--- a/python/sglang/srt/distributed/communication_op.py
+++ b/python/sglang/srt/distributed/communication_op.py
@@ -4,7 +4,7 @@ from typing import Any, Dict, Optional, Union
 import torch
 import torch.distributed

-from .parallel_state import get_tp_group
+from sglang.srt.distributed.parallel_state import get_tp_group


 def tensor_model_parallel_all_reduce(input_: torch.Tensor) -> torch.Tensor:
--- a/python/sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py
+++ b/python/sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py
@@ -7,7 +7,6 @@ import pickle
 import subprocess
 import sys
 import tempfile
-from functools import lru_cache
 from itertools import product
 from typing import Dict, List, Optional, Sequence

--- a/python/sglang/srt/distributed/device_communicators/pynccl_wrapper.py
+++ b/python/sglang/srt/distributed/device_communicators/pynccl_wrapper.py
@@ -57,7 +57,7 @@ def find_nccl_library() -> str:
            so_file = "librccl.so.1"
        else:
            raise ValueError("NCCL only supports CUDA and ROCm backends.")
-        logger.info("Found nccl from library %s", so_file)
+        logger.debug("Found nccl from library %s", so_file)
    return so_file


--- a/python/sglang/srt/distributed/device_communicators/shm_broadcast.py
+++ b/python/sglang/srt/distributed/device_communicators/shm_broadcast.py
@@ -313,7 +313,7 @@ class MessageQueue:
            remote_subscribe_port=remote_subscribe_port,
        )

-        logger.info("vLLM message queue communication handle: %s", self.handle)
+        logger.debug("Message queue communication handle: %s", self.handle)

    def export_handle(self) -> Handle:
        return self.handle