Unit test for Hierarchical Caching (#4486)

This commit is contained in:
Zhiqiang Xie
2025-03-17 17:45:00 -07:00
committed by GitHub
parent 9b81f9bd34
commit a98290aea3
7 changed files with 65 additions and 5 deletions

View File

@@ -29,6 +29,7 @@ class HiRadixCache(RadixCache):
token_to_kv_pool_allocator: TokenToKVPoolAllocator,
tp_cache_group: torch.distributed.ProcessGroup,
page_size: int,
hicache_ratio: float,
):
if page_size != 1:
raise ValueError(
@@ -36,9 +37,13 @@ class HiRadixCache(RadixCache):
)
self.kv_cache = token_to_kv_pool_allocator.get_kvcache()
if isinstance(self.kv_cache, MHATokenToKVPool):
self.token_to_kv_pool_host = MHATokenToKVPoolHost(self.kv_cache)
self.token_to_kv_pool_host = MHATokenToKVPoolHost(
self.kv_cache, hicache_ratio
)
elif isinstance(self.kv_cache, MLATokenToKVPool):
self.token_to_kv_pool_host = MLATokenToKVPoolHost(self.kv_cache)
self.token_to_kv_pool_host = MLATokenToKVPoolHost(
self.kv_cache, hicache_ratio
)
else:
raise ValueError(f"Only MHA and MLA supports swap kv_cache to host.")