[HICache]: Support 3FS-Store with page_first_direct layout (#11460)
This commit is contained in:
@@ -67,6 +67,7 @@ python3 -m sglang.launch_server \
|
|||||||
--enable-hierarchical-cache \
|
--enable-hierarchical-cache \
|
||||||
--hicache-ratio 2 \
|
--hicache-ratio 2 \
|
||||||
--hicache-size 0 \
|
--hicache-size 0 \
|
||||||
|
--hicache-mem-layout page_first_direct \
|
||||||
--hicache-io-backend direct \
|
--hicache-io-backend direct \
|
||||||
--hicache-write-policy write_through \
|
--hicache-write-policy write_through \
|
||||||
--hicache-storage-backend hf3fs \
|
--hicache-storage-backend hf3fs \
|
||||||
@@ -86,6 +87,7 @@ python3 -m sglang.launch_server \
|
|||||||
--page-size 64 \
|
--page-size 64 \
|
||||||
--hicache-ratio 2 \
|
--hicache-ratio 2 \
|
||||||
--hicache-size 0 \
|
--hicache-size 0 \
|
||||||
|
--hicache-mem-layout page_first_direct \
|
||||||
--hicache-io-backend direct \
|
--hicache-io-backend direct \
|
||||||
--hicache-write-policy write_through \
|
--hicache-write-policy write_through \
|
||||||
--hicache-storage-backend hf3fs \
|
--hicache-storage-backend hf3fs \
|
||||||
@@ -115,7 +117,8 @@ python3 -m sglang.launch_server \
|
|||||||
--enable-hierarchical-cache \
|
--enable-hierarchical-cache \
|
||||||
--hicache-ratio 2 \
|
--hicache-ratio 2 \
|
||||||
--hicache-size 0 \
|
--hicache-size 0 \
|
||||||
--hicache-mem-layout page_first \
|
--hicache-mem-layout page_first_direct \
|
||||||
|
--hicache-io-backend direct \
|
||||||
--hicache-write-policy write_through \
|
--hicache-write-policy write_through \
|
||||||
--hicache-storage-backend hf3fs \
|
--hicache-storage-backend hf3fs \
|
||||||
--hicache-storage-prefetch-policy wait_complete \
|
--hicache-storage-prefetch-policy wait_complete \
|
||||||
@@ -140,8 +143,8 @@ python3 -m sglang.launch_server \
|
|||||||
--page-size 64 \
|
--page-size 64 \
|
||||||
--enable-hierarchical-cache \
|
--enable-hierarchical-cache \
|
||||||
--hicache-ratio 2 \
|
--hicache-ratio 2 \
|
||||||
--hicache-mem-layout page_first \
|
--hicache-mem-layout page_first_direct \
|
||||||
--hicache-io-backend kernel \
|
--hicache-io-backend direct \
|
||||||
--hicache-storage-backend mooncake \
|
--hicache-storage-backend mooncake \
|
||||||
--hicache-write-policy write_through \
|
--hicache-write-policy write_through \
|
||||||
--hicache-storage-prefetch-policy timeout
|
--hicache-storage-prefetch-policy timeout
|
||||||
|
|||||||
@@ -170,7 +170,7 @@ class StorageBackendFactory:
|
|||||||
return backend
|
return backend
|
||||||
elif backend_name == "hf3fs":
|
elif backend_name == "hf3fs":
|
||||||
# Calculate bytes_per_page based on memory pool layout
|
# Calculate bytes_per_page based on memory pool layout
|
||||||
if mem_pool_host.layout == "page_first":
|
if mem_pool_host.layout in ["page_first", "page_first_direct"]:
|
||||||
bytes_per_page = (
|
bytes_per_page = (
|
||||||
mem_pool_host.get_ksize_per_token() * mem_pool_host.page_size
|
mem_pool_host.get_ksize_per_token() * mem_pool_host.page_size
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -501,8 +501,12 @@ class HiCacheHF3FS(HiCacheStorage):
|
|||||||
|
|
||||||
def register_mem_pool_host(self, mem_pool_host: HostKVCache):
|
def register_mem_pool_host(self, mem_pool_host: HostKVCache):
|
||||||
super().register_mem_pool_host(mem_pool_host)
|
super().register_mem_pool_host(mem_pool_host)
|
||||||
self.is_zero_copy = self.mem_pool_host.layout == "page_first"
|
self.is_zero_copy = self.mem_pool_host.layout in [
|
||||||
logger.info(f"{self.is_zero_copy=}")
|
"page_first",
|
||||||
|
"page_first_direct",
|
||||||
|
]
|
||||||
|
|
||||||
|
logger.info(f"{self.is_zero_copy=}, layout={self.mem_pool_host.layout}")
|
||||||
|
|
||||||
def _get_mha_zero_copy_keys(self, keys: List[str]) -> List[str]:
|
def _get_mha_zero_copy_keys(self, keys: List[str]) -> List[str]:
|
||||||
_keys = []
|
_keys = []
|
||||||
|
|||||||
@@ -76,6 +76,8 @@ class TestHf3fsBackendAccuracy(HiCacheStorage3FSBackendBaseMixin, CustomTestCase
|
|||||||
server_args, env_vars = super()._get_additional_server_args_and_env()
|
server_args, env_vars = super()._get_additional_server_args_and_env()
|
||||||
server_args["--hicache-ratio"] = 1.5
|
server_args["--hicache-ratio"] = 1.5
|
||||||
server_args["--tp-size"] = 2
|
server_args["--tp-size"] = 2
|
||||||
|
server_args["--hicache-mem-layout"] = "page_first_direct"
|
||||||
|
server_args["--hicache-io-backend"] = "direct"
|
||||||
return server_args, env_vars
|
return server_args, env_vars
|
||||||
|
|
||||||
def test_eval_accuracy(self):
|
def test_eval_accuracy(self):
|
||||||
|
|||||||
Reference in New Issue
Block a user