[PD] Release initial code (#4654)

Co-authored-by: SangBin Cho <rkooo567@gmail.com> Co-authored-by: Ying1123 <sqy1415@gmail.com> Co-authored-by: merrymercy <lianminzheng@gmail.com> Co-authored-by: makro Co-authored-by: dhou-xai
2025-03-21 14:47:47 -07:00
parent 417fc72f6f
commit c7c7dbebbe
10 changed files with 1410 additions and 9 deletions
--- a/python/sglang/srt/disaggregation/utils.py
+++ b/python/sglang/srt/disaggregation/utils.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from collections import deque
+from enum import Enum
+from typing import List
+
+import torch
+import torch.distributed as dist
+
+
+class DisaggregationMode(Enum):
+    NULL = "null"
+    PREFILL = "prefill"
+    DECODE = "decode"
+
+
+def poll_and_all_reduce(pollers, gloo_group):
+    polls = [int(poller.poll()) for poller in pollers]
+    tensor_to_reduce = torch.tensor(polls, dtype=torch.uint8, device="cpu")
+    dist.all_reduce(tensor_to_reduce, op=dist.ReduceOp.MIN, group=gloo_group)
+    return tensor_to_reduce.tolist()
+
+
+class ReqToMetadataIdxAllocator:
+    """A memory pool that maps a request to its first output token location."""
+
+    def __init__(
+        self,
+        size: int,
+    ):
+        self.size = size
+        self.free_slots = deque(list(range(size)))
+
+    def available_size(self):
+        return len(self.free_slots)
+
+    def alloc(self) -> List[int]:
+        if len(self.free_slots) == 0:
+            return None
+
+        return self.free_slots.popleft()
+
+    def free(self, free_index: int):
+        self.free_slots.append(free_index)