Reduce computation and communication in DP attention (#4521)
This commit is contained in:
@@ -189,6 +189,9 @@ class GroupCoordinator:
|
||||
device_group: ProcessGroup # group for device communication
|
||||
use_pynccl: bool # a hint of whether to use PyNccl
|
||||
use_custom_allreduce: bool # a hint of whether to use CustomAllreduce
|
||||
use_message_queue_broadcaster: (
|
||||
bool # a hint of whether to use message queue broadcaster
|
||||
)
|
||||
# communicators are only created for world size > 1
|
||||
pynccl_comm: Optional[Any] # PyNccl communicator
|
||||
ca_comm: Optional[Any] # Custom allreduce communicator
|
||||
@@ -241,6 +244,7 @@ class GroupCoordinator:
|
||||
self.use_custom_allreduce = use_custom_allreduce
|
||||
self.use_hpu_communicator = use_hpu_communicator
|
||||
self.use_xpu_communicator = use_xpu_communicator
|
||||
self.use_message_queue_broadcaster = use_message_queue_broadcaster
|
||||
|
||||
# lazy import to avoid documentation build error
|
||||
from sglang.srt.distributed.device_communicators.custom_all_reduce import (
|
||||
@@ -269,7 +273,7 @@ class GroupCoordinator:
|
||||
HpuCommunicator,
|
||||
)
|
||||
|
||||
self.hpu_communicator: Optional[HpuCommunicator]
|
||||
self.hpu_communicator: Optional[HpuCommunicator] = None
|
||||
if use_hpu_communicator and self.world_size > 1:
|
||||
self.hpu_communicator = HpuCommunicator(group=self.device_group)
|
||||
|
||||
@@ -277,7 +281,7 @@ class GroupCoordinator:
|
||||
XpuCommunicator,
|
||||
)
|
||||
|
||||
self.xpu_communicator: Optional[XpuCommunicator]
|
||||
self.xpu_communicator: Optional[XpuCommunicator] = None
|
||||
if use_xpu_communicator and self.world_size > 1:
|
||||
self.xpu_communicator = XpuCommunicator(group=self.device_group)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user