Reduce computation and communication in DP attention (#4521)

This commit is contained in:
Cheng Wan
2025-03-18 16:41:36 -04:00
committed by GitHub
parent 9e0186f352
commit 3196999f63
5 changed files with 70 additions and 80 deletions

View File

@@ -189,6 +189,9 @@ class GroupCoordinator:
device_group: ProcessGroup # group for device communication
use_pynccl: bool # a hint of whether to use PyNccl
use_custom_allreduce: bool # a hint of whether to use CustomAllreduce
use_message_queue_broadcaster: (
bool # a hint of whether to use message queue broadcaster
)
# communicators are only created for world size > 1
pynccl_comm: Optional[Any] # PyNccl communicator
ca_comm: Optional[Any] # Custom allreduce communicator
@@ -241,6 +244,7 @@ class GroupCoordinator:
self.use_custom_allreduce = use_custom_allreduce
self.use_hpu_communicator = use_hpu_communicator
self.use_xpu_communicator = use_xpu_communicator
self.use_message_queue_broadcaster = use_message_queue_broadcaster
# lazy import to avoid documentation build error
from sglang.srt.distributed.device_communicators.custom_all_reduce import (
@@ -269,7 +273,7 @@ class GroupCoordinator:
HpuCommunicator,
)
self.hpu_communicator: Optional[HpuCommunicator]
self.hpu_communicator: Optional[HpuCommunicator] = None
if use_hpu_communicator and self.world_size > 1:
self.hpu_communicator = HpuCommunicator(group=self.device_group)
@@ -277,7 +281,7 @@ class GroupCoordinator:
XpuCommunicator,
)
self.xpu_communicator: Optional[XpuCommunicator]
self.xpu_communicator: Optional[XpuCommunicator] = None
if use_xpu_communicator and self.world_size > 1:
self.xpu_communicator = XpuCommunicator(group=self.device_group)