[Feature] LMCache Connector Integration (#9741)

Signed-off-by: Oasis-Git <ayw.sirius19@gmail.com>
Signed-off-by: YuhanLiu11 <yliu738@wisc.edu>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
This commit is contained in:
Yuwei An
2025-09-06 20:14:55 -07:00
committed by GitHub
parent cb3918a091
commit 9a7ced4e4d
7 changed files with 478 additions and 3 deletions

View File

@@ -656,6 +656,21 @@ class Scheduler(
page_size=self.page_size,
disable=server_args.disable_radix_cache,
)
elif server_args.enable_lmcache:
from sglang.srt.mem_cache.storage.lmcache.lmc_radix_cache import (
LMCRadixCache,
)
self.tree_cache = LMCRadixCache(
req_to_token_pool=self.req_to_token_pool,
token_to_kv_pool_allocator=self.token_to_kv_pool_allocator,
page_size=self.page_size,
disable=server_args.disable_radix_cache,
model_config=self.model_config,
tp_size=self.tp_size,
rank=self.tp_rank,
tp_group=self.tp_group,
)
else:
self.tree_cache = RadixCache(
req_to_token_pool=self.req_to_token_pool,
@@ -1411,9 +1426,11 @@ class Scheduler(
_, _, available_size, evictable_size = self._get_token_info()
protected_size = self.tree_cache.protected_size()
memory_leak = (available_size + evictable_size) != (
# self.max_total_num_tokens
# if not self.enable_hierarchical_cache
# else self.max_total_num_tokens - protected_size
self.max_total_num_tokens
if not self.enable_hierarchical_cache
else self.max_total_num_tokens - protected_size
- protected_size
)
token_msg = f"{self.max_total_num_tokens=}, {available_size=}, {evictable_size=}, {protected_size=}\n"