[HiCache] Clear kvcache in storage backend with fastAPI (#9750)
Co-authored-by: hzh0425 <hzh0425@apache.org>
This commit is contained in:
@@ -814,6 +814,16 @@ class BatchEmbeddingOut:
|
||||
cached_tokens: List[int]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ClearHiCacheReqInput:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ClearHiCacheReqOutput:
|
||||
success: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class FlushCacheReqInput:
|
||||
pass
|
||||
|
||||
@@ -69,6 +69,8 @@ from sglang.srt.managers.io_struct import (
|
||||
AbortReq,
|
||||
BatchTokenizedEmbeddingReqInput,
|
||||
BatchTokenizedGenerateReqInput,
|
||||
ClearHiCacheReqInput,
|
||||
ClearHiCacheReqOutput,
|
||||
CloseSessionReqInput,
|
||||
ExpertDistributionReq,
|
||||
ExpertDistributionReqOutput,
|
||||
@@ -515,6 +517,7 @@ class Scheduler(
|
||||
(BatchTokenizedGenerateReqInput, self.handle_batch_generate_request),
|
||||
(BatchTokenizedEmbeddingReqInput, self.handle_batch_embedding_request),
|
||||
(FlushCacheReqInput, self.flush_cache_wrapped),
|
||||
(ClearHiCacheReqInput, self.clear_hicache_storage_wrapped),
|
||||
(AbortReq, self.abort_request),
|
||||
(OpenSessionReqInput, self.open_session),
|
||||
(CloseSessionReqInput, self.close_session),
|
||||
@@ -2207,6 +2210,16 @@ class Scheduler(
|
||||
success = self.flush_cache()
|
||||
return FlushCacheReqOutput(success=success)
|
||||
|
||||
def clear_hicache_storage_wrapped(self, recv_req: ClearHiCacheReqInput):
|
||||
if self.enable_hierarchical_cache:
|
||||
self.tree_cache.clear_storage_backend()
|
||||
logger.info("Hierarchical cache cleared successfully!")
|
||||
if_success = True
|
||||
else:
|
||||
logging.warning("Hierarchical cache is not enabled.")
|
||||
if_success = False
|
||||
return ClearHiCacheReqOutput(success=if_success)
|
||||
|
||||
def flush_cache(self):
|
||||
"""Flush the memory pool and cache."""
|
||||
if (
|
||||
|
||||
@@ -73,6 +73,8 @@ from sglang.srt.managers.io_struct import (
|
||||
BatchTokenIDOut,
|
||||
BatchTokenizedEmbeddingReqInput,
|
||||
BatchTokenizedGenerateReqInput,
|
||||
ClearHiCacheReqInput,
|
||||
ClearHiCacheReqOutput,
|
||||
CloseSessionReqInput,
|
||||
ConfigureLoggingReq,
|
||||
EmbeddingReqInput,
|
||||
@@ -386,6 +388,9 @@ class TokenizerManager:
|
||||
self.flush_cache_communicator = _Communicator(
|
||||
self.send_to_scheduler, server_args.dp_size
|
||||
)
|
||||
self.clear_hicache_storage_communicator = _Communicator(
|
||||
self.send_to_scheduler, server_args.dp_size
|
||||
)
|
||||
self.profile_communicator = _Communicator(
|
||||
self.send_to_scheduler, server_args.dp_size
|
||||
)
|
||||
@@ -447,6 +452,10 @@ class TokenizerManager:
|
||||
SlowDownReqOutput,
|
||||
self.slow_down_communicator.handle_recv,
|
||||
),
|
||||
(
|
||||
ClearHiCacheReqOutput,
|
||||
self.clear_hicache_storage_communicator.handle_recv,
|
||||
),
|
||||
(
|
||||
FlushCacheReqOutput,
|
||||
self.flush_cache_communicator.handle_recv,
|
||||
@@ -988,6 +997,13 @@ class TokenizerManager:
|
||||
async def flush_cache(self) -> FlushCacheReqOutput:
|
||||
return (await self.flush_cache_communicator(FlushCacheReqInput()))[0]
|
||||
|
||||
async def clear_hicache_storage(self) -> ClearHiCacheReqOutput:
|
||||
"""Clear the hierarchical cache storage."""
|
||||
# Delegate to the scheduler to handle HiCacheStorage clearing
|
||||
return (await self.clear_hicache_storage_communicator(ClearHiCacheReqInput()))[
|
||||
0
|
||||
]
|
||||
|
||||
def abort_request(self, rid: str = "", abort_all: bool = False):
|
||||
if not abort_all and rid not in self.rid_to_state:
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user