Interface change for kvcache io to support page first layout (#8318)

This commit is contained in:
Zhiqiang Xie
2025-07-31 20:37:49 -07:00
committed by GitHub
parent 9305ea6c2d
commit dd7ca00601
6 changed files with 371 additions and 171 deletions

View File

@@ -35,16 +35,33 @@ class HiRadixCache(RadixCache):
hicache_size: int,
hicache_write_policy: str,
hicache_io_backend: str,
hicache_mem_layout: str,
hicache_storage_backend: Optional[str] = None,
):
if hicache_io_backend == "direct":
if hicache_mem_layout == "page_first":
hicache_mem_layout = "layer_first"
logger.warning(
"Page first layout is not supported with direct IO backend, switching to layer first layout"
)
self.kv_cache = token_to_kv_pool_allocator.get_kvcache()
if isinstance(self.kv_cache, MHATokenToKVPool):
self.token_to_kv_pool_host = MHATokenToKVPoolHost(
self.kv_cache, hicache_ratio, hicache_size, page_size
self.kv_cache,
hicache_ratio,
hicache_size,
page_size,
hicache_mem_layout,
)
elif isinstance(self.kv_cache, MLATokenToKVPool):
self.token_to_kv_pool_host = MLATokenToKVPoolHost(
self.kv_cache, hicache_ratio, hicache_size, page_size
self.kv_cache,
hicache_ratio,
hicache_size,
page_size,
hicache_mem_layout,
)
else:
raise ValueError(f"HiRadixCache only supports MHA and MLA yet")