[HiCacheStorage] mooncake store support page_first_direct layout (#10591)
This commit is contained in:
@@ -48,9 +48,9 @@ class HiRadixCache(RadixCache):
|
||||
|
||||
if hicache_io_backend == "direct":
|
||||
if hicache_mem_layout == "page_first":
|
||||
hicache_mem_layout = "layer_first"
|
||||
hicache_mem_layout = "page_first_direct"
|
||||
logger.warning(
|
||||
"Page first layout is not supported with direct IO backend, switching to layer first layout"
|
||||
"Page first layout is not supported with direct IO backend, switching to page first direct layout"
|
||||
)
|
||||
|
||||
self.kv_cache = token_to_kv_pool_allocator.get_kvcache()
|
||||
|
||||
@@ -190,9 +190,10 @@ class MooncakeStore(HiCacheStorage):
|
||||
|
||||
def register_mem_pool_host(self, mem_pool_host: HostKVCache):
|
||||
super().register_mem_pool_host(mem_pool_host)
|
||||
assert (
|
||||
self.mem_pool_host.layout == "page_first"
|
||||
), "mooncake store storage backend only support page first layout"
|
||||
assert self.mem_pool_host.layout in [
|
||||
"page_first",
|
||||
"page_first_direct",
|
||||
], "mooncake store storage backend only support page first or page first direct layout"
|
||||
buffer = self.mem_pool_host.kv_buffer
|
||||
try:
|
||||
buffer_ptr = buffer.data_ptr()
|
||||
|
||||
@@ -930,8 +930,15 @@ class ServerArgs:
|
||||
|
||||
def _handle_hicache(self):
|
||||
if self.hicache_storage_backend == "mooncake":
|
||||
self.hicache_io_backend = "kernel"
|
||||
self.hicache_mem_layout = "page_first"
|
||||
if self.hicache_mem_layout == "layer_first":
|
||||
if self.hicache_io_backend == "direct":
|
||||
self.hicache_mem_layout = "page_first_direct"
|
||||
elif self.hicache_io_backend == "kernel":
|
||||
self.hicache_mem_layout = "page_first"
|
||||
logger.warning(
|
||||
f"Mooncake storage backend does not support layer_first layout, "
|
||||
f"switching to {self.hicache_mem_layout} layout for {self.hicache_io_backend} io backend"
|
||||
)
|
||||
|
||||
if self.hicache_mem_layout == "page_first_direct":
|
||||
if self.hicache_io_backend != "direct":
|
||||
|
||||
Reference in New Issue
Block a user