From 4d89389c4fc6ee4cea912aface345b6822976674 Mon Sep 17 00:00:00 2001 From: hzh0425 Date: Wed, 3 Sep 2025 02:30:11 +0800 Subject: [PATCH] Fix the key passing issue in page first layout. (#9929) --- .../sglang/srt/managers/cache_controller.py | 1 + .../sglang/srt/mem_cache/hicache_storage.py | 1 + .../mem_cache/storage/hf3fs/storage_hf3fs.py | 32 +++++++++++++++---- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/python/sglang/srt/managers/cache_controller.py b/python/sglang/srt/managers/cache_controller.py index 93a6d7b2e..ca441b9f6 100644 --- a/python/sglang/srt/managers/cache_controller.py +++ b/python/sglang/srt/managers/cache_controller.py @@ -407,6 +407,7 @@ class HiCacheController: tp_rank=self.tp_rank, tp_size=self.tp_size, is_mla_model=is_mla_backend, + is_page_first_layout=self.mem_pool_host.layout == "page_first", model_name=model_name, extra_config=extra_config, ) diff --git a/python/sglang/srt/mem_cache/hicache_storage.py b/python/sglang/srt/mem_cache/hicache_storage.py index 159c70012..1d3ed5ae9 100644 --- a/python/sglang/srt/mem_cache/hicache_storage.py +++ b/python/sglang/srt/mem_cache/hicache_storage.py @@ -27,6 +27,7 @@ class HiCacheStorageConfig: tp_rank: int tp_size: int is_mla_model: bool + is_page_first_layout: bool model_name: Optional[str] extra_config: Optional[dict] = None diff --git a/python/sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py b/python/sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py index a30230cdc..fe27673c4 100644 --- a/python/sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py +++ b/python/sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py @@ -128,6 +128,7 @@ class HiCacheHF3FS(HiCacheStorage): dtype: torch.dtype, metadata_client: Hf3fsMetadataInterface, is_mla_model: bool = False, + is_page_first_layout: bool = False, ): self.rank = rank self.file_path = file_path @@ -138,6 +139,7 @@ class HiCacheHF3FS(HiCacheStorage): self.dtype = dtype self.metadata_client = metadata_client self.is_mla_model = is_mla_model + self.is_page_first_layout = is_page_first_layout self.numel = self.bytes_per_page // self.dtype.itemsize self.num_pages = self.file_size // self.bytes_per_page self.skip_backup = False @@ -193,9 +195,13 @@ class HiCacheHF3FS(HiCacheStorage): ) if storage_config is not None: - rank, is_mla_model = storage_config.tp_rank, storage_config.is_mla_model + rank, is_mla_model, is_page_first_layout = ( + storage_config.tp_rank, + storage_config.is_mla_model, + storage_config.is_page_first_layout, + ) else: - rank, is_mla_model = 0, False + rank, is_mla_model, is_page_first_layout = 0, False, False mla_unsupported_msg = f"MLA model is not supported without global metadata server, please refer to https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/mem_cache/storage/hf3fs/docs/deploy_sglang_3fs_multinode.md" @@ -213,6 +219,7 @@ class HiCacheHF3FS(HiCacheStorage): entries=8, dtype=dtype, metadata_client=Hf3fsLocalMetadataClient(), + is_page_first_layout=is_page_first_layout, ) try: @@ -261,6 +268,7 @@ class HiCacheHF3FS(HiCacheStorage): dtype=dtype, metadata_client=metadata_client, is_mla_model=is_mla_model, + is_page_first_layout=is_page_first_layout, ) def get( @@ -407,12 +415,22 @@ class HiCacheHF3FS(HiCacheStorage): return result[0] if result else False def batch_exists(self, keys: List[str]) -> int: - results = self.metadata_client.exists(self.rank, keys) - for i in range(len(keys)): - if not results[i]: - return i + if self.is_page_first_layout and not self.is_mla_model: + query_keys = [] + # Compatible with page_first layout's key format, Refer to memory_pool_host.py#get_buffer_with_hash + for key in keys: + query_keys.append(f"{key}-k") + query_keys.append(f"{key}-v") + key_multiplier = 2 + else: + query_keys = keys + key_multiplier = 1 - return len(keys) + exist_result = self.metadata_client.exists(self.rank, query_keys) + for i in range(len(query_keys)): + if not exist_result[i]: + return i // key_multiplier + return len(query_keys) // key_multiplier def clear(self) -> bool: try: