[HiCache] Clear kvcache in storage backend with fastAPI (#9750)

Co-authored-by: hzh0425 <hzh0425@apache.org>
This commit is contained in:
Teng Ma
2025-08-31 17:41:44 +08:00
committed by GitHub
parent 9a0d0b754d
commit f05c68733e
8 changed files with 84 additions and 4 deletions

View File

@@ -73,6 +73,8 @@ from sglang.srt.managers.io_struct import (
BatchTokenIDOut,
BatchTokenizedEmbeddingReqInput,
BatchTokenizedGenerateReqInput,
ClearHiCacheReqInput,
ClearHiCacheReqOutput,
CloseSessionReqInput,
ConfigureLoggingReq,
EmbeddingReqInput,
@@ -386,6 +388,9 @@ class TokenizerManager:
self.flush_cache_communicator = _Communicator(
self.send_to_scheduler, server_args.dp_size
)
self.clear_hicache_storage_communicator = _Communicator(
self.send_to_scheduler, server_args.dp_size
)
self.profile_communicator = _Communicator(
self.send_to_scheduler, server_args.dp_size
)
@@ -447,6 +452,10 @@ class TokenizerManager:
SlowDownReqOutput,
self.slow_down_communicator.handle_recv,
),
(
ClearHiCacheReqOutput,
self.clear_hicache_storage_communicator.handle_recv,
),
(
FlushCacheReqOutput,
self.flush_cache_communicator.handle_recv,
@@ -988,6 +997,13 @@ class TokenizerManager:
async def flush_cache(self) -> FlushCacheReqOutput:
return (await self.flush_cache_communicator(FlushCacheReqInput()))[0]
async def clear_hicache_storage(self) -> ClearHiCacheReqOutput:
"""Clear the hierarchical cache storage."""
# Delegate to the scheduler to handle HiCacheStorage clearing
return (await self.clear_hicache_storage_communicator(ClearHiCacheReqInput()))[
0
]
def abort_request(self, rid: str = "", abort_all: bool = False):
if not abort_all and rid not in self.rid_to_state:
return