[AMD] switch to custom allreduce regardless of MSCCL setting on ROCm (#6097)
This commit is contained in:
@@ -296,7 +296,6 @@ class CustomAllreduce:
|
|||||||
self.meta, self.rank_data, handles, offsets, rank, self.full_nvlink
|
self.meta, self.rank_data, handles, offsets, rank, self.full_nvlink
|
||||||
)
|
)
|
||||||
self.register_buffer(self.buffer)
|
self.register_buffer(self.buffer)
|
||||||
self.MSCCL = os.getenv("RCCL_MSCCL_ENABLE", "1") == "1"
|
|
||||||
|
|
||||||
self.disabled = False
|
self.disabled = False
|
||||||
|
|
||||||
@@ -430,12 +429,6 @@ class CustomAllreduce:
|
|||||||
|
|
||||||
if _is_hip:
|
if _is_hip:
|
||||||
if self.full_nvlink:
|
if self.full_nvlink:
|
||||||
if self.world_size == 8:
|
|
||||||
if self.MSCCL:
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return inp_size < self.max_size
|
|
||||||
else:
|
|
||||||
return inp_size < self.max_size
|
return inp_size < self.max_size
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user