[Feature] add disable-custom-all-reduce (#1148)

Co-authored-by: chenxu02 <chenxu02@zhihu.com> Co-authored-by: Yineng Zhang <me@zhyncs.com>
2024-08-20 23:44:12 +08:00
parent a8ae640328
commit ff2cfdb1a2
2 changed files with 9 additions and 0 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -86,6 +86,7 @@ class ServerArgs:
    enable_mla: bool = False
    attention_reduce_in_fp32: bool = False
    efficient_weight_load: bool = False
+    disable_custom_all_reduce: bool = False

    # Distributed args
    nccl_init_addr: Optional[str] = None
@@ -428,6 +429,12 @@ class ServerArgs:
            action="store_true",
            help="Turn on memory efficient weight loading with quantization (quantize per layer during loading).",
        )
+        parser.add_argument(
+            "--disable-custom-all-reduce",
+            action="store_true",
+            default=False,
+            help="Disable the custom all-reduce kernel and fall back to NCCL.",
+        )

    @classmethod
    def from_cli_args(cls, args: argparse.Namespace):