Kernels for efficient KV cache IO (#7313)

This commit is contained in:
Zhiqiang Xie
2025-07-06 22:53:36 -07:00
committed by GitHub
parent 253454de9b
commit 2fc824b84c
7 changed files with 184 additions and 371 deletions

View File

@@ -34,6 +34,7 @@ class HiRadixCache(RadixCache):
hicache_ratio: float,
hicache_size: int,
hicache_write_policy: str,
hicache_io_backend: str,
):
self.kv_cache = token_to_kv_pool_allocator.get_kvcache()
if isinstance(self.kv_cache, MHATokenToKVPool):
@@ -56,6 +57,7 @@ class HiRadixCache(RadixCache):
page_size,
load_cache_event=self.load_cache_event,
write_policy=hicache_write_policy,
io_backend=hicache_io_backend,
)
# record the nodes with ongoing write through