[PD] Release initial code (#4654)

Co-authored-by: SangBin Cho <rkooo567@gmail.com>
Co-authored-by: Ying1123 <sqy1415@gmail.com>
Co-authored-by: merrymercy <lianminzheng@gmail.com>
Co-authored-by: makro
Co-authored-by: dhou-xai
This commit is contained in:
Byron Hsu
2025-03-21 14:47:47 -07:00
committed by GitHub
parent 417fc72f6f
commit c7c7dbebbe
10 changed files with 1410 additions and 9 deletions

View File

@@ -42,6 +42,8 @@ import triton.language as tl
from sglang.global_config import global_config
from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.constrained.base_grammar_backend import BaseGrammarObject
from sglang.srt.disaggregation.conn import KVSender
from sglang.srt.disaggregation.decode import ScheduleBatchDisaggregationDecodeMixin
from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
from sglang.srt.mem_cache.chunk_cache import ChunkCache
from sglang.srt.mem_cache.memory_pool import ReqToTokenPool, TokenToKVPoolAllocator
@@ -396,6 +398,24 @@ class Req:
self.spec_verify_ct = 0
self.lora_path = lora_path
# For disaggregation
self.bootstrap_host: str = "0.0.0.0"
self.bootstrap_room: Optional[int] = None
self.disagg_kv_sender: Optional[KVSender] = None
# used for warmup because we don't have a pair yet when init
self.skip_kv_transfer: bool = False
# the start index of the sent kv cache
# We want to send it chunk by chunk for chunked prefill.
# After every chunk forward, we do the following:
# kv_send(req.input_ids[req.start_send_idx:len(req.fill_ids)])
# start_send_idx = len(req.fill_ids)
self.start_send_idx: int = 0
self.metadata_buffer_index: int = -1
# The first output_id transferred from prefill instance.
self.transferred_output_id: Optional[int] = None
@property
def seqlen(self):
return len(self.origin_input_ids) + len(self.output_ids)
@@ -531,7 +551,7 @@ bid = 0
@dataclasses.dataclass
class ScheduleBatch:
class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
"""Store all information of a batch on the scheduler."""
# Request, memory pool, and cache