Roll back to use vllm custom allreduce (#3006)

This commit is contained in:
Lianmin Zheng
2025-01-20 04:03:15 -08:00
committed by GitHub
parent dc1881326f
commit 89cd923581
10 changed files with 18 additions and 65 deletions

View File

@@ -1,3 +1,3 @@
from .communication_op import *
from .parallel_state import *
from .utils import *
from sglang.srt.distributed.communication_op import *
from sglang.srt.distributed.parallel_state import *
from sglang.srt.distributed.utils import *

View File

@@ -4,7 +4,7 @@ from typing import Any, Dict, Optional, Union
import torch
import torch.distributed
from .parallel_state import get_tp_group
from sglang.srt.distributed.parallel_state import get_tp_group
def tensor_model_parallel_all_reduce(input_: torch.Tensor) -> torch.Tensor:

View File

@@ -7,7 +7,6 @@ import pickle
import subprocess
import sys
import tempfile
from functools import lru_cache
from itertools import product
from typing import Dict, List, Optional, Sequence

View File

@@ -57,7 +57,7 @@ def find_nccl_library() -> str:
so_file = "librccl.so.1"
else:
raise ValueError("NCCL only supports CUDA and ROCm backends.")
logger.info("Found nccl from library %s", so_file)
logger.debug("Found nccl from library %s", so_file)
return so_file

View File

@@ -313,7 +313,7 @@ class MessageQueue:
remote_subscribe_port=remote_subscribe_port,
)
logger.info("vLLM message queue communication handle: %s", self.handle)
logger.debug("Message queue communication handle: %s", self.handle)
def export_handle(self) -> Handle:
return self.handle