diff --git a/docs/advanced_features/hicache_best_practices.md b/docs/advanced_features/hicache_best_practices.md index 80a4850c8..92ed5180c 100644 --- a/docs/advanced_features/hicache_best_practices.md +++ b/docs/advanced_features/hicache_best_practices.md @@ -67,6 +67,7 @@ python3 -m sglang.launch_server \ --enable-hierarchical-cache \ --hicache-ratio 2 \ --hicache-size 0 \ + --hicache-mem-layout page_first_direct \ --hicache-io-backend direct \ --hicache-write-policy write_through \ --hicache-storage-backend hf3fs \ @@ -86,6 +87,7 @@ python3 -m sglang.launch_server \ --page-size 64 \ --hicache-ratio 2 \ --hicache-size 0 \ + --hicache-mem-layout page_first_direct \ --hicache-io-backend direct \ --hicache-write-policy write_through \ --hicache-storage-backend hf3fs \ @@ -115,7 +117,8 @@ python3 -m sglang.launch_server \ --enable-hierarchical-cache \ --hicache-ratio 2 \ --hicache-size 0 \ - --hicache-mem-layout page_first \ + --hicache-mem-layout page_first_direct \ + --hicache-io-backend direct \ --hicache-write-policy write_through \ --hicache-storage-backend hf3fs \ --hicache-storage-prefetch-policy wait_complete \ @@ -140,8 +143,8 @@ python3 -m sglang.launch_server \ --page-size 64 \ --enable-hierarchical-cache \ --hicache-ratio 2 \ - --hicache-mem-layout page_first \ - --hicache-io-backend kernel \ + --hicache-mem-layout page_first_direct \ + --hicache-io-backend direct \ --hicache-storage-backend mooncake \ --hicache-write-policy write_through \ --hicache-storage-prefetch-policy timeout diff --git a/python/sglang/srt/mem_cache/storage/backend_factory.py b/python/sglang/srt/mem_cache/storage/backend_factory.py index dd5da6a5c..4b195c863 100644 --- a/python/sglang/srt/mem_cache/storage/backend_factory.py +++ b/python/sglang/srt/mem_cache/storage/backend_factory.py @@ -170,7 +170,7 @@ class StorageBackendFactory: return backend elif backend_name == "hf3fs": # Calculate bytes_per_page based on memory pool layout - if mem_pool_host.layout == "page_first": + if mem_pool_host.layout in ["page_first", "page_first_direct"]: bytes_per_page = ( mem_pool_host.get_ksize_per_token() * mem_pool_host.page_size ) diff --git a/python/sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py b/python/sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py index 1f8c58dbd..a789c2af8 100644 --- a/python/sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py +++ b/python/sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py @@ -501,8 +501,12 @@ class HiCacheHF3FS(HiCacheStorage): def register_mem_pool_host(self, mem_pool_host: HostKVCache): super().register_mem_pool_host(mem_pool_host) - self.is_zero_copy = self.mem_pool_host.layout == "page_first" - logger.info(f"{self.is_zero_copy=}") + self.is_zero_copy = self.mem_pool_host.layout in [ + "page_first", + "page_first_direct", + ] + + logger.info(f"{self.is_zero_copy=}, layout={self.mem_pool_host.layout}") def _get_mha_zero_copy_keys(self, keys: List[str]) -> List[str]: _keys = [] diff --git a/test/srt/hicache/test_hicache_storage_3fs_backend.py b/test/srt/hicache/test_hicache_storage_3fs_backend.py index 362da4b73..d3dea5117 100644 --- a/test/srt/hicache/test_hicache_storage_3fs_backend.py +++ b/test/srt/hicache/test_hicache_storage_3fs_backend.py @@ -76,6 +76,8 @@ class TestHf3fsBackendAccuracy(HiCacheStorage3FSBackendBaseMixin, CustomTestCase server_args, env_vars = super()._get_additional_server_args_and_env() server_args["--hicache-ratio"] = 1.5 server_args["--tp-size"] = 2 + server_args["--hicache-mem-layout"] = "page_first_direct" + server_args["--hicache-io-backend"] = "direct" return server_args, env_vars def test_eval_accuracy(self):