[HiCache] Clear kvcache in storage backend with fastAPI (#9750)

Co-authored-by: hzh0425 <hzh0425@apache.org>
This commit is contained in:
Teng Ma
2025-08-31 17:41:44 +08:00
committed by GitHub
parent 9a0d0b754d
commit f05c68733e
8 changed files with 84 additions and 4 deletions

View File

@@ -814,6 +814,16 @@ class BatchEmbeddingOut:
cached_tokens: List[int]
@dataclass
class ClearHiCacheReqInput:
pass
@dataclass
class ClearHiCacheReqOutput:
success: bool
@dataclass
class FlushCacheReqInput:
pass

View File

@@ -69,6 +69,8 @@ from sglang.srt.managers.io_struct import (
AbortReq,
BatchTokenizedEmbeddingReqInput,
BatchTokenizedGenerateReqInput,
ClearHiCacheReqInput,
ClearHiCacheReqOutput,
CloseSessionReqInput,
ExpertDistributionReq,
ExpertDistributionReqOutput,
@@ -515,6 +517,7 @@ class Scheduler(
(BatchTokenizedGenerateReqInput, self.handle_batch_generate_request),
(BatchTokenizedEmbeddingReqInput, self.handle_batch_embedding_request),
(FlushCacheReqInput, self.flush_cache_wrapped),
(ClearHiCacheReqInput, self.clear_hicache_storage_wrapped),
(AbortReq, self.abort_request),
(OpenSessionReqInput, self.open_session),
(CloseSessionReqInput, self.close_session),
@@ -2207,6 +2210,16 @@ class Scheduler(
success = self.flush_cache()
return FlushCacheReqOutput(success=success)
def clear_hicache_storage_wrapped(self, recv_req: ClearHiCacheReqInput):
if self.enable_hierarchical_cache:
self.tree_cache.clear_storage_backend()
logger.info("Hierarchical cache cleared successfully!")
if_success = True
else:
logging.warning("Hierarchical cache is not enabled.")
if_success = False
return ClearHiCacheReqOutput(success=if_success)
def flush_cache(self):
"""Flush the memory pool and cache."""
if (

View File

@@ -73,6 +73,8 @@ from sglang.srt.managers.io_struct import (
BatchTokenIDOut,
BatchTokenizedEmbeddingReqInput,
BatchTokenizedGenerateReqInput,
ClearHiCacheReqInput,
ClearHiCacheReqOutput,
CloseSessionReqInput,
ConfigureLoggingReq,
EmbeddingReqInput,
@@ -386,6 +388,9 @@ class TokenizerManager:
self.flush_cache_communicator = _Communicator(
self.send_to_scheduler, server_args.dp_size
)
self.clear_hicache_storage_communicator = _Communicator(
self.send_to_scheduler, server_args.dp_size
)
self.profile_communicator = _Communicator(
self.send_to_scheduler, server_args.dp_size
)
@@ -447,6 +452,10 @@ class TokenizerManager:
SlowDownReqOutput,
self.slow_down_communicator.handle_recv,
),
(
ClearHiCacheReqOutput,
self.clear_hicache_storage_communicator.handle_recv,
),
(
FlushCacheReqOutput,
self.flush_cache_communicator.handle_recv,
@@ -988,6 +997,13 @@ class TokenizerManager:
async def flush_cache(self) -> FlushCacheReqOutput:
return (await self.flush_cache_communicator(FlushCacheReqInput()))[0]
async def clear_hicache_storage(self) -> ClearHiCacheReqOutput:
"""Clear the hierarchical cache storage."""
# Delegate to the scheduler to handle HiCacheStorage clearing
return (await self.clear_hicache_storage_communicator(ClearHiCacheReqInput()))[
0
]
def abort_request(self, rid: str = "", abort_all: bool = False):
if not abort_all and rid not in self.rid_to_state:
return