[PD] Support KV transfer with mooncake (#4880)

Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com>
Co-authored-by: Shangming Cai <caishangming@linux.alibaba.com>
Co-authored-by: Xuchun Shang <xuchun.shang@linux.alibaba.com>
Co-authored-by: shangmingc <csmthu@gmail.com>
This commit is contained in:
Teng Ma
2025-04-10 14:23:23 +08:00
committed by GitHub
parent f730362ee2
commit 4c31ae9f6d
8 changed files with 571 additions and 30 deletions

View File

@@ -26,6 +26,7 @@ import torch
from sglang.srt.disaggregation.conn import KVArgs, KVManager, KVPoll, KVSender
from sglang.srt.disaggregation.utils import (
DisaggregationMode,
ReqToMetadataIdxAllocator,
poll_and_all_reduce,
)
@@ -95,7 +96,7 @@ class PrefillBootstrapQueue:
metadata_buffer[0].nbytes for metadata_buffer in self.metadata_buffers
]
kv_args.ib_device = "mock-ib-device"
kv_manager = KVManager(kv_args)
kv_manager = KVManager(kv_args, DisaggregationMode("prefill"))
return kv_manager
def add(self, req: Req) -> None: