support 1 shot allreduce in 1-node and 2-node using mscclpp (#6277)
This commit is contained in:
@@ -98,11 +98,12 @@ def initialize_dp_attention(
|
||||
],
|
||||
local_rank,
|
||||
torch.distributed.get_backend(tp_group.device_group),
|
||||
SYNC_TOKEN_IDS_ACROSS_TP,
|
||||
False,
|
||||
False,
|
||||
False,
|
||||
False,
|
||||
use_pynccl=SYNC_TOKEN_IDS_ACROSS_TP,
|
||||
use_pymscclpp=False,
|
||||
use_custom_allreduce=False,
|
||||
use_hpu_communicator=False,
|
||||
use_xpu_communicator=False,
|
||||
use_npu_communicator=False,
|
||||
group_name="attention_tp",
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user