diff --git a/python/sglang/srt/mem_cache/hiradix_cache.py b/python/sglang/srt/mem_cache/hiradix_cache.py index f2ed1aea9..75ff08fd6 100644 --- a/python/sglang/srt/mem_cache/hiradix_cache.py +++ b/python/sglang/srt/mem_cache/hiradix_cache.py @@ -48,9 +48,9 @@ class HiRadixCache(RadixCache): if hicache_io_backend == "direct": if hicache_mem_layout == "page_first": - hicache_mem_layout = "layer_first" + hicache_mem_layout = "page_first_direct" logger.warning( - "Page first layout is not supported with direct IO backend, switching to layer first layout" + "Page first layout is not supported with direct IO backend, switching to page first direct layout" ) self.kv_cache = token_to_kv_pool_allocator.get_kvcache() diff --git a/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py b/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py index 2ea016285..0b9db07f7 100644 --- a/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py +++ b/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py @@ -190,9 +190,10 @@ class MooncakeStore(HiCacheStorage): def register_mem_pool_host(self, mem_pool_host: HostKVCache): super().register_mem_pool_host(mem_pool_host) - assert ( - self.mem_pool_host.layout == "page_first" - ), "mooncake store storage backend only support page first layout" + assert self.mem_pool_host.layout in [ + "page_first", + "page_first_direct", + ], "mooncake store storage backend only support page first or page first direct layout" buffer = self.mem_pool_host.kv_buffer try: buffer_ptr = buffer.data_ptr() diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 1535d3a2a..f076b8a1f 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -930,8 +930,15 @@ class ServerArgs: def _handle_hicache(self): if self.hicache_storage_backend == "mooncake": - self.hicache_io_backend = "kernel" - self.hicache_mem_layout = "page_first" + if self.hicache_mem_layout == "layer_first": + if self.hicache_io_backend == "direct": + self.hicache_mem_layout = "page_first_direct" + elif self.hicache_io_backend == "kernel": + self.hicache_mem_layout = "page_first" + logger.warning( + f"Mooncake storage backend does not support layer_first layout, " + f"switching to {self.hicache_mem_layout} layout for {self.hicache_io_backend} io backend" + ) if self.hicache_mem_layout == "page_first_direct": if self.hicache_io_backend != "direct": diff --git a/test/srt/hicache/test_hicache_storage_mooncake_backend.py b/test/srt/hicache/test_hicache_storage_mooncake_backend.py index 9e750397a..2f3f94bce 100644 --- a/test/srt/hicache/test_hicache_storage_mooncake_backend.py +++ b/test/srt/hicache/test_hicache_storage_mooncake_backend.py @@ -236,6 +236,21 @@ class TestMooncakeBackendPageFirstLayout( """Get additional server arguments specific to configuration - override in subclasses""" server_args, env_vars = super()._get_additional_server_args_and_env() server_args["--hicache-mem-layout"] = "page_first" + server_args["--hicache-io-backend"] = "kernel" + return server_args, env_vars + + +class TestMooncakeBackendPageFirstDirectLayout( + HiCacheStorageMooncakeBackendBaseMixin, CustomTestCase +): + """Page first layout tests for HiCache-Mooncake backend""" + + @classmethod + def _get_additional_server_args_and_env(cls): + """Get additional server arguments specific to configuration - override in subclasses""" + server_args, env_vars = super()._get_additional_server_args_and_env() + server_args["--hicache-mem-layout"] = "page_first_direct" + server_args["--hicache-io-backend"] = "direct" return server_args, env_vars