feat(hicache-3fs): 3FS-Store Backup Optimizations For MLA Model. (#9692)

2025-08-30 01:48:51 +08:00
parent 54e872d343
commit 161e9dc51e
1 changed files with 25 additions and 5 deletions
--- a/python/sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py
+++ b/python/sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py
@@ -125,6 +125,7 @@ class HiCacheHF3FS(HiCacheStorage):
        entries: int,
        dtype: torch.dtype,
        metadata_client: Hf3fsMetadataInterface,
+        is_mla_model: bool = False,
    ):
        self.rank = rank
        self.file_path = file_path
@@ -134,9 +135,13 @@ class HiCacheHF3FS(HiCacheStorage):
        self.entries = entries
        self.dtype = dtype
        self.metadata_client = metadata_client
-
+        self.is_mla_model = is_mla_model
        self.numel = self.bytes_per_page // self.dtype.itemsize
        self.num_pages = self.file_size // self.bytes_per_page
+        self.skip_backup = False
+        if self.is_mla_model and self.rank != 0:
+            self.skip_backup = True
+            self.rank = 0

        logger.info(
            f"[Rank {self.rank}] HiCacheHF3FS Client Initializing: "
@@ -209,10 +214,14 @@ class HiCacheHF3FS(HiCacheStorage):
            raise ValueError(f"Missing required keys in config: {missing_keys}")

        # Choose metadata client based on configuration
+        is_mla_model = False
        if "metadata_server_url" in config and config["metadata_server_url"]:
            # Use global metadata client to connect to metadata server
            metadata_server_url = config["metadata_server_url"]
            metadata_client = Hf3fsGlobalMetadataClient(metadata_server_url)
+
+            # Enable MLA optimization only when using the global metadata client
+            is_mla_model = storage_config.is_mla_model if storage_config else False
            logger.info(
                f"Using global metadata client with server url: {metadata_server_url}"
            )
@@ -222,13 +231,15 @@ class HiCacheHF3FS(HiCacheStorage):

        return HiCacheHF3FS(
            rank=rank,
-            file_path=f"{config['file_path_prefix']}.{rank}.bin",
+            # Let all ranks use the same file path for MLA model
+            file_path=f"{config['file_path_prefix']}.{rank if not is_mla_model else 0}.bin",
            file_size=int(config["file_size"]),
            numjobs=int(config["numjobs"]),
            bytes_per_page=bytes_per_page,
            entries=int(config["entries"]),
            dtype=dtype,
            metadata_client=metadata_client,
+            is_mla_model=is_mla_model,
        )

    def get(
@@ -312,6 +323,10 @@ class HiCacheHF3FS(HiCacheStorage):
        target_locations: Optional[Any] = None,
        target_sizes: Optional[Any] = None,
    ) -> bool:
+        # In MLA backend, only one rank needs to backup the KV cache
+        if self.skip_backup:
+            return True
+
        # Todo: Add prefix block's hash key
        key_with_prefix = [(key, "") for key in keys]
        indices = self.metadata_client.reserve_and_allocate_page_indices(
@@ -363,16 +378,21 @@ class HiCacheHF3FS(HiCacheStorage):

        return all(results)

-    @synchronized()
    def delete(self, key: str) -> None:
        self.metadata_client.delete_keys(self.rank, [key])

-    @synchronized()
    def exists(self, key: str) -> bool:
        result = self.metadata_client.exists(self.rank, [key])
        return result[0] if result else False

-    @synchronized()
+    def batch_exists(self, keys: List[str]) -> int:
+        results = self.metadata_client.exists(self.rank, keys)
+        for i in range(len(keys)):
+            if not results[i]:
+                return i
+
+        return len(keys)
+
    def clear(self) -> None:
        self.metadata_client.clear(self.rank)