[HiCache] Clear kvcache in storage backend with fastAPI (#9750)
Co-authored-by: hzh0425 <hzh0425@apache.org>
This commit is contained in:
@@ -480,6 +480,16 @@ async def flush_cache():
|
||||
)
|
||||
|
||||
|
||||
@app.api_route("/clear_hicache_storage_backend", methods=["GET", "POST"])
|
||||
async def clear_hicache_storage_backend():
|
||||
"""Clear the hierarchical cache storage backend."""
|
||||
ret = await _global_state.tokenizer_manager.clear_hicache_storage()
|
||||
return Response(
|
||||
content="Hierarchical cache storage backend cleared.\n",
|
||||
status_code=200 if ret.success else HTTPStatus.BAD_REQUEST,
|
||||
)
|
||||
|
||||
|
||||
@app.api_route("/start_profile", methods=["GET", "POST"])
|
||||
async def start_profile_async(obj: Optional[ProfileReqInput] = None):
|
||||
"""Start profiling."""
|
||||
|
||||
@@ -814,6 +814,16 @@ class BatchEmbeddingOut:
|
||||
cached_tokens: List[int]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ClearHiCacheReqInput:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ClearHiCacheReqOutput:
|
||||
success: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class FlushCacheReqInput:
|
||||
pass
|
||||
|
||||
@@ -69,6 +69,8 @@ from sglang.srt.managers.io_struct import (
|
||||
AbortReq,
|
||||
BatchTokenizedEmbeddingReqInput,
|
||||
BatchTokenizedGenerateReqInput,
|
||||
ClearHiCacheReqInput,
|
||||
ClearHiCacheReqOutput,
|
||||
CloseSessionReqInput,
|
||||
ExpertDistributionReq,
|
||||
ExpertDistributionReqOutput,
|
||||
@@ -515,6 +517,7 @@ class Scheduler(
|
||||
(BatchTokenizedGenerateReqInput, self.handle_batch_generate_request),
|
||||
(BatchTokenizedEmbeddingReqInput, self.handle_batch_embedding_request),
|
||||
(FlushCacheReqInput, self.flush_cache_wrapped),
|
||||
(ClearHiCacheReqInput, self.clear_hicache_storage_wrapped),
|
||||
(AbortReq, self.abort_request),
|
||||
(OpenSessionReqInput, self.open_session),
|
||||
(CloseSessionReqInput, self.close_session),
|
||||
@@ -2207,6 +2210,16 @@ class Scheduler(
|
||||
success = self.flush_cache()
|
||||
return FlushCacheReqOutput(success=success)
|
||||
|
||||
def clear_hicache_storage_wrapped(self, recv_req: ClearHiCacheReqInput):
|
||||
if self.enable_hierarchical_cache:
|
||||
self.tree_cache.clear_storage_backend()
|
||||
logger.info("Hierarchical cache cleared successfully!")
|
||||
if_success = True
|
||||
else:
|
||||
logging.warning("Hierarchical cache is not enabled.")
|
||||
if_success = False
|
||||
return ClearHiCacheReqOutput(success=if_success)
|
||||
|
||||
def flush_cache(self):
|
||||
"""Flush the memory pool and cache."""
|
||||
if (
|
||||
|
||||
@@ -73,6 +73,8 @@ from sglang.srt.managers.io_struct import (
|
||||
BatchTokenIDOut,
|
||||
BatchTokenizedEmbeddingReqInput,
|
||||
BatchTokenizedGenerateReqInput,
|
||||
ClearHiCacheReqInput,
|
||||
ClearHiCacheReqOutput,
|
||||
CloseSessionReqInput,
|
||||
ConfigureLoggingReq,
|
||||
EmbeddingReqInput,
|
||||
@@ -386,6 +388,9 @@ class TokenizerManager:
|
||||
self.flush_cache_communicator = _Communicator(
|
||||
self.send_to_scheduler, server_args.dp_size
|
||||
)
|
||||
self.clear_hicache_storage_communicator = _Communicator(
|
||||
self.send_to_scheduler, server_args.dp_size
|
||||
)
|
||||
self.profile_communicator = _Communicator(
|
||||
self.send_to_scheduler, server_args.dp_size
|
||||
)
|
||||
@@ -447,6 +452,10 @@ class TokenizerManager:
|
||||
SlowDownReqOutput,
|
||||
self.slow_down_communicator.handle_recv,
|
||||
),
|
||||
(
|
||||
ClearHiCacheReqOutput,
|
||||
self.clear_hicache_storage_communicator.handle_recv,
|
||||
),
|
||||
(
|
||||
FlushCacheReqOutput,
|
||||
self.flush_cache_communicator.handle_recv,
|
||||
@@ -988,6 +997,13 @@ class TokenizerManager:
|
||||
async def flush_cache(self) -> FlushCacheReqOutput:
|
||||
return (await self.flush_cache_communicator(FlushCacheReqInput()))[0]
|
||||
|
||||
async def clear_hicache_storage(self) -> ClearHiCacheReqOutput:
|
||||
"""Clear the hierarchical cache storage."""
|
||||
# Delegate to the scheduler to handle HiCacheStorage clearing
|
||||
return (await self.clear_hicache_storage_communicator(ClearHiCacheReqInput()))[
|
||||
0
|
||||
]
|
||||
|
||||
def abort_request(self, rid: str = "", abort_all: bool = False):
|
||||
if not abort_all and rid not in self.rid_to_state:
|
||||
return
|
||||
|
||||
@@ -102,6 +102,20 @@ class HiCacheStorage(ABC):
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def delete(self, key: str) -> bool:
|
||||
"""
|
||||
Delete the entry associated with the given key.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def clear(self) -> bool:
|
||||
"""
|
||||
Clear all entries in the storage.
|
||||
"""
|
||||
pass
|
||||
|
||||
def batch_exists(self, keys: List[str]) -> int:
|
||||
"""
|
||||
Check if the keys exist in the storage.
|
||||
@@ -214,12 +228,14 @@ class HiCacheFile(HiCacheStorage):
|
||||
logger.warning(f"Key {key} does not exist. Cannot delete.")
|
||||
return
|
||||
|
||||
def clear(self) -> None:
|
||||
def clear(self) -> bool:
|
||||
try:
|
||||
for filename in os.listdir(self.file_path):
|
||||
file_path = os.path.join(self.file_path, filename)
|
||||
if os.path.isfile(file_path):
|
||||
os.remove(file_path)
|
||||
logger.info("Cleared all entries in HiCacheFile storage.")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to clear HiCacheFile storage: {e}")
|
||||
return False
|
||||
|
||||
@@ -125,6 +125,15 @@ class HiRadixCache(RadixCache):
|
||||
height += 1
|
||||
return height
|
||||
|
||||
def clear_storage_backend(self):
|
||||
if self.enable_storage:
|
||||
self.cache_controller.storage_backend.clear()
|
||||
logger.info("Hierarchical cache storage backend cleared successfully!")
|
||||
return True
|
||||
else:
|
||||
logger.warning("Hierarchical cache storage backend is not enabled.")
|
||||
return False
|
||||
|
||||
def write_backup(self, node: TreeNode, write_back=False):
|
||||
host_indices = self.cache_controller.write(
|
||||
device_indices=node.value,
|
||||
|
||||
@@ -393,8 +393,14 @@ class HiCacheHF3FS(HiCacheStorage):
|
||||
|
||||
return len(keys)
|
||||
|
||||
def clear(self) -> None:
|
||||
self.metadata_client.clear(self.rank)
|
||||
def clear(self) -> bool:
|
||||
try:
|
||||
self.metadata_client.clear(self.rank)
|
||||
logger.info(f"Cleared HiCacheHF3FS for rank {self.rank}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to clear HiCacheHF3FS: {e}")
|
||||
return False
|
||||
|
||||
def close(self) -> None:
|
||||
try:
|
||||
|
||||
@@ -254,7 +254,7 @@ class MooncakeStore(HiCacheStorage):
|
||||
pass
|
||||
|
||||
def clear(self) -> None:
|
||||
raise (NotImplementedError)
|
||||
self.store.remove_all()
|
||||
|
||||
def _put_batch_zero_copy_impl(
|
||||
self, key_strs: List[str], buffer_ptrs: List[int], buffer_sizes: List[int]
|
||||
|
||||
Reference in New Issue
Block a user