diff --git a/python/sglang/srt/mem_cache/hicache_storage.py b/python/sglang/srt/mem_cache/hicache_storage.py index de069c4b7..8ebdecfda 100644 --- a/python/sglang/srt/mem_cache/hicache_storage.py +++ b/python/sglang/srt/mem_cache/hicache_storage.py @@ -33,8 +33,7 @@ class HiCacheStorage(ABC): It abstracts the underlying storage mechanism, allowing different implementations to be used. """ - # todo, translate tensor object access for different TP ranks - # potentially pass model and TP configs into storage backend + # todo, potentially pass model and TP configs into storage backend # todo, the page size of storage backend does not have to be the same as the same as host memory pool @abstractmethod @@ -117,35 +116,28 @@ class HiCacheFile(HiCacheStorage): def get( self, key: str, - target_location: Optional[Any] = None, + target_location: torch.Tensor, target_sizes: Optional[Any] = None, ) -> torch.Tensor | None: key = self._get_suffixed_key(key) tensor_path = os.path.join(self.file_path, f"{key}.bin") try: - if target_location is not None: - # Load directly into target_location's memory buffer - with open(tensor_path, "rb") as f: - target_location.set_( - torch.frombuffer(f.read(), dtype=target_location.dtype) - .reshape(target_location.shape) - .storage() - ) - return target_location - else: - loaded_tensor = torch.load(tensor_path) - if isinstance(loaded_tensor, torch.Tensor): - return loaded_tensor - else: - logger.error(f"Loaded data for key {key} is not a tensor.") - return None + # Load directly into target_location's memory buffer + with open(tensor_path, "rb") as f: + target_location.set_( + torch.frombuffer(f.read(), dtype=target_location.dtype) + .reshape(target_location.shape) + .untyped_storage() + ) + return target_location except FileNotFoundError: + logger.warning(f"Failed to fetch {key} from HiCacheFile storage.") return None def batch_get( self, keys: List[str], - target_locations: Optional[Any] = None, + target_locations: List[torch.Tensor], target_sizes: Optional[Any] = None, ) -> List[torch.Tensor | None]: return [ @@ -168,7 +160,7 @@ class HiCacheFile(HiCacheStorage): logger.debug(f"Key {key} already exists. Skipped.") return True try: - torch.save(value, tensor_path) + value.contiguous().view(dtype=torch.uint8).numpy().tofile(tensor_path) return True except Exception as e: logger.error(f"Failed to save tensor {key}: {e}") diff --git a/python/sglang/srt/mem_cache/mooncake_store/README.md b/python/sglang/srt/mem_cache/storage/mooncake_store/README.md similarity index 100% rename from python/sglang/srt/mem_cache/mooncake_store/README.md rename to python/sglang/srt/mem_cache/storage/mooncake_store/README.md diff --git a/python/sglang/srt/mem_cache/mooncake_store/mooncake_store.py b/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py similarity index 100% rename from python/sglang/srt/mem_cache/mooncake_store/mooncake_store.py rename to python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py diff --git a/python/sglang/srt/mem_cache/mooncake_store/unit_test.py b/python/sglang/srt/mem_cache/storage/mooncake_store/unit_test.py similarity index 100% rename from python/sglang/srt/mem_cache/mooncake_store/unit_test.py rename to python/sglang/srt/mem_cache/storage/mooncake_store/unit_test.py diff --git a/python/sglang/srt/mem_cache/nixl/README.md b/python/sglang/srt/mem_cache/storage/nixl/README.md similarity index 100% rename from python/sglang/srt/mem_cache/nixl/README.md rename to python/sglang/srt/mem_cache/storage/nixl/README.md diff --git a/python/sglang/srt/mem_cache/nixl/hicache_nixl.py b/python/sglang/srt/mem_cache/storage/nixl/hicache_nixl.py similarity index 100% rename from python/sglang/srt/mem_cache/nixl/hicache_nixl.py rename to python/sglang/srt/mem_cache/storage/nixl/hicache_nixl.py diff --git a/python/sglang/srt/mem_cache/nixl/nixl_utils.py b/python/sglang/srt/mem_cache/storage/nixl/nixl_utils.py similarity index 100% rename from python/sglang/srt/mem_cache/nixl/nixl_utils.py rename to python/sglang/srt/mem_cache/storage/nixl/nixl_utils.py diff --git a/python/sglang/srt/mem_cache/nixl/test_hicache_nixl_storage.py b/python/sglang/srt/mem_cache/storage/nixl/test_hicache_nixl_storage.py similarity index 100% rename from python/sglang/srt/mem_cache/nixl/test_hicache_nixl_storage.py rename to python/sglang/srt/mem_cache/storage/nixl/test_hicache_nixl_storage.py