[Feat] Support Torch Symm Mem AllReduce (#10571)

Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>
2025-10-06 04:55:19 +08:00
parent 148d8d485d
commit 590f2da052
8 changed files with 466 additions and 1 deletions
--- a/benchmark/lora/launch_server.py
+++ b/benchmark/lora/launch_server.py
@@ -28,6 +28,8 @@ def launch_server(args):
        cmd += "--disable-custom-all-reduce"
    if args.enable_mscclpp:
        cmd += "--enable-mscclpp"
+    if args.enable_torch_symm_mem:
+        cmd += "--enable-torch-symm-mem"
    print(cmd)
    os.system(cmd)

@@ -70,6 +72,11 @@ if __name__ == "__main__":
        action="store_true",
        help="Enable using mscclpp for small messages for all-reduce kernel and fall back to NCCL.",
    )
+    parser.add_argument(
+        "--enable-torch-symm-mem",
+        action="store_true",
+        help="Enable using torch symm mem for all-reduce kernel and fall back to NCCL.",
+    )
    args = parser.parse_args()

    launch_server(args)