[HiCache] Clear kvcache in storage backend with fastAPI (#9750)
Co-authored-by: hzh0425 <hzh0425@apache.org>
This commit is contained in:
@@ -73,6 +73,8 @@ from sglang.srt.managers.io_struct import (
|
||||
BatchTokenIDOut,
|
||||
BatchTokenizedEmbeddingReqInput,
|
||||
BatchTokenizedGenerateReqInput,
|
||||
ClearHiCacheReqInput,
|
||||
ClearHiCacheReqOutput,
|
||||
CloseSessionReqInput,
|
||||
ConfigureLoggingReq,
|
||||
EmbeddingReqInput,
|
||||
@@ -386,6 +388,9 @@ class TokenizerManager:
|
||||
self.flush_cache_communicator = _Communicator(
|
||||
self.send_to_scheduler, server_args.dp_size
|
||||
)
|
||||
self.clear_hicache_storage_communicator = _Communicator(
|
||||
self.send_to_scheduler, server_args.dp_size
|
||||
)
|
||||
self.profile_communicator = _Communicator(
|
||||
self.send_to_scheduler, server_args.dp_size
|
||||
)
|
||||
@@ -447,6 +452,10 @@ class TokenizerManager:
|
||||
SlowDownReqOutput,
|
||||
self.slow_down_communicator.handle_recv,
|
||||
),
|
||||
(
|
||||
ClearHiCacheReqOutput,
|
||||
self.clear_hicache_storage_communicator.handle_recv,
|
||||
),
|
||||
(
|
||||
FlushCacheReqOutput,
|
||||
self.flush_cache_communicator.handle_recv,
|
||||
@@ -988,6 +997,13 @@ class TokenizerManager:
|
||||
async def flush_cache(self) -> FlushCacheReqOutput:
|
||||
return (await self.flush_cache_communicator(FlushCacheReqInput()))[0]
|
||||
|
||||
async def clear_hicache_storage(self) -> ClearHiCacheReqOutput:
|
||||
"""Clear the hierarchical cache storage."""
|
||||
# Delegate to the scheduler to handle HiCacheStorage clearing
|
||||
return (await self.clear_hicache_storage_communicator(ClearHiCacheReqInput()))[
|
||||
0
|
||||
]
|
||||
|
||||
def abort_request(self, rid: str = "", abort_all: bool = False):
|
||||
if not abort_all and rid not in self.rid_to_state:
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user