Add back data parallelism (#1635)

This commit is contained in:
Lianmin Zheng
2024-10-11 07:22:48 -07:00
committed by GitHub
parent 5d09ca5735
commit 23cc66f7b6
7 changed files with 228 additions and 39 deletions

View File

@@ -574,7 +574,7 @@ class ServerArgs:
self.tp_size % self.nnodes == 0
), "tp_size must be divisible by number of nodes"
assert not (
self.dp_size > 1 and self.node_rank is not None
self.dp_size > 1 and self.nnodes != 1
), "multi-node data parallel is not supported"
assert (
self.max_loras_per_batch > 0
@@ -583,11 +583,6 @@ class ServerArgs:
and (self.lora_paths is None or self.disable_radix_cache)
), "compatibility of lora and cuda graph and radix attention is in progress"
assert self.dp_size == 1, (
"The support for data parallelism is temporarily disabled during refactor. "
"Please use sglang<=0.3.2 or wait for later updates."
)
if isinstance(self.lora_paths, list):
lora_paths = self.lora_paths
self.lora_paths = {}
@@ -626,8 +621,8 @@ class PortArgs:
# The ipc filename for detokenizer to receive inputs from scheduler (zmq)
detokenizer_ipc_name: str
# The port for nccl initialization for multiple TP groups (torch.dist)
nccl_ports: List[int]
# The port for nccl initialization (torch.dist)
nccl_port: int
@staticmethod
def init_new(server_args) -> "PortArgs":
@@ -641,7 +636,7 @@ class PortArgs:
tokenizer_ipc_name=tempfile.NamedTemporaryFile(delete=False).name,
scheduler_input_ipc_name=tempfile.NamedTemporaryFile(delete=False).name,
detokenizer_ipc_name=tempfile.NamedTemporaryFile(delete=False).name,
nccl_ports=[port],
nccl_port=port,
)