Add back data parallelism (#1635)

2024-10-11 07:22:48 -07:00
parent 5d09ca5735
commit 23cc66f7b6
7 changed files with 228 additions and 39 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -574,7 +574,7 @@ class ServerArgs:
            self.tp_size % self.nnodes == 0
        ), "tp_size must be divisible by number of nodes"
        assert not (
-            self.dp_size > 1 and self.node_rank is not None
+            self.dp_size > 1 and self.nnodes != 1
        ), "multi-node data parallel is not supported"
        assert (
            self.max_loras_per_batch > 0
@@ -583,11 +583,6 @@ class ServerArgs:
            and (self.lora_paths is None or self.disable_radix_cache)
        ), "compatibility of lora and cuda graph and radix attention is in progress"

-        assert self.dp_size == 1, (
-            "The support for data parallelism is temporarily disabled during refactor. "
-            "Please use sglang<=0.3.2 or wait for later updates."
-        )
-
        if isinstance(self.lora_paths, list):
            lora_paths = self.lora_paths
            self.lora_paths = {}
@@ -626,8 +621,8 @@ class PortArgs:
    # The ipc filename for detokenizer to receive inputs from scheduler (zmq)
    detokenizer_ipc_name: str

-    # The port for nccl initialization for multiple TP groups (torch.dist)
-    nccl_ports: List[int]
+    # The port for nccl initialization (torch.dist)
+    nccl_port: int

    @staticmethod
    def init_new(server_args) -> "PortArgs":
@@ -641,7 +636,7 @@ class PortArgs:
            tokenizer_ipc_name=tempfile.NamedTemporaryFile(delete=False).name,
            scheduler_input_ipc_name=tempfile.NamedTemporaryFile(delete=False).name,
            detokenizer_ipc_name=tempfile.NamedTemporaryFile(delete=False).name,
-            nccl_ports=[port],
+            nccl_port=port,
        )