[Feat] Support Torch Symm Mem AllReduce (#10571)

Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>
This commit is contained in:
Yuan Luo
2025-10-06 04:55:19 +08:00
committed by GitHub
parent 148d8d485d
commit 590f2da052
8 changed files with 466 additions and 1 deletions

View File

@@ -28,6 +28,8 @@ def launch_server(args):
cmd += "--disable-custom-all-reduce"
if args.enable_mscclpp:
cmd += "--enable-mscclpp"
if args.enable_torch_symm_mem:
cmd += "--enable-torch-symm-mem"
print(cmd)
os.system(cmd)
@@ -70,6 +72,11 @@ if __name__ == "__main__":
action="store_true",
help="Enable using mscclpp for small messages for all-reduce kernel and fall back to NCCL.",
)
parser.add_argument(
"--enable-torch-symm-mem",
action="store_true",
help="Enable using torch symm mem for all-reduce kernel and fall back to NCCL.",
)
args = parser.parse_args()
launch_server(args)