Add support for NCCL symmetric memory for TP allreduces (#8238)

2025-08-01 18:30:55 -05:00
parent b89d37cb11
commit 82e6c3a65a
13 changed files with 266 additions and 30 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -218,6 +218,7 @@ class ServerArgs:
    enable_profile_cuda_graph: bool = False
    enable_cudagraph_gc: bool = False
    enable_nccl_nvls: bool = False
+    enable_symm_mem: bool = False
    enable_tokenizer_batch_encode: bool = False
    disable_outlines_disk_cache: bool = False
    disable_custom_all_reduce: bool = False
@@ -1599,6 +1600,11 @@ class ServerArgs:
            action="store_true",
            help="Enable NCCL NVLS for prefill heavy requests when available.",
        )
+        parser.add_argument(
+            "--enable-symm-mem",
+            action="store_true",
+            help="Enable NCCL symmetric memory for fast collectives.",
+        )
        parser.add_argument(
            "--enable-tokenizer-batch-encode",
            action="store_true",