Revert "Revert the changes on NCCL symmetric memory" (#10238)

This commit is contained in:
Lianmin Zheng
2025-09-09 12:11:49 -07:00
committed by GitHub
parent d352c29aa0
commit 4582931ac3
5 changed files with 43 additions and 7 deletions

View File

@@ -510,6 +510,17 @@ class GroupCoordinator:
if self.npu_communicator is not None and not self.npu_communicator.disabled:
return self.npu_communicator.all_reduce(input_)
if (
self.pynccl_comm is not None
and hasattr(input_, "symmetric_memory")
and input_.symmetric_memory
):
with self.pynccl_comm.change_state(
enable=True, stream=torch.cuda.current_stream()
):
self.pynccl_comm.all_reduce(input_)
return input_
outplace_all_reduce_method = None
if (
self.qr_comm is not None