From 8471e5e616a34e765a89aa70201828b55f91ed27 Mon Sep 17 00:00:00 2001 From: Teng Ma Date: Wed, 10 Sep 2025 03:50:00 +0800 Subject: [PATCH] [HiCache] feat: add mooncake backend extra config (#10213) --- .../storage/mooncake_store/README.md | 14 +++++++ .../storage/mooncake_store/mooncake_store.py | 42 ++++++++++++++++++- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/mem_cache/storage/mooncake_store/README.md b/python/sglang/srt/mem_cache/storage/mooncake_store/README.md index b1f408604..e815122bd 100644 --- a/python/sglang/srt/mem_cache/storage/mooncake_store/README.md +++ b/python/sglang/srt/mem_cache/storage/mooncake_store/README.md @@ -103,6 +103,10 @@ Note: To get started quickly, if `MOONCAKE_GLOBAL_SEGMENT_SIZE` is set to a non- **Start the `SGLang server` with Mooncake enabled:** Mooncake configuration can be provided via environment variables. Note that, for optimal performance, the Mooncake backend currently supports only the `page_first` layout (which optimizes memory access patterns for KV cache operations). +There are two ways to configure Mooncake: 1. Using environment variables; 2. Using extra-config of sglang arguments. + +**Using env variables to configure Mooncake** + ```bash MOONCAKE_TE_META_DATA_SERVER="http://127.0.0.1:8080/metadata" \ MOONCAKE_MASTER=127.0.0.1:50051 \ @@ -123,6 +127,16 @@ Parameter Explanation: * `MOONCAKE_DEVICE`: The RDMA devices used by Mooncake. This parameter is required only when the protocol is set to `"rdma"`. Available devices can be listed using the `ibv_devices` command. * `MOONCAKE_GLOBAL_SEGMENT_SIZE`: The amount of memory (in bytes) contributed to the global memory pool. If at least one `store service` is launched, then this value could be set to `0`. In this case, the `SGLang server` will not contribute any memory to the system. Note that KV tensors cached in the contributed memory will be lost once this process terminates; however, this will not cause any system errors. +**Using extra-config of sglang arguments to configure Mooncake** + +```bash +python -m sglang.launch_server \ + --enable-hierarchical-cache \ + --hicache-storage-backend mooncake \ + --model-path [model_path] \ + --hicache-storage-backend-extra-config '{"master_server_address": "127.0.0.1:50051", "local_hostname": "localhost", "metadata_server": "http://127.0.0.1:8080/metadata", "global_segment_size": 4294967296, "local_buffer_size": 16777216, "protocol": "rdma", "device_name": "mlx5_0,mlx5_1"}' +``` + **Important: Understanding Global Segment Size** `global_segment_size` for `store service` and `MOONCAKE_GLOBAL_SEGMENT_SIZE` for `SGLang service`: This parameter specifies the amount of memory each instance contributes to the distributed memory pool. The total memory available for KV cache storage across the cluster is the sum of the memory contributed by all instances. diff --git a/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py b/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py index 55262971d..caab04b5c 100644 --- a/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py +++ b/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py @@ -72,6 +72,26 @@ class MooncakeStoreConfig: master_server_address=os.getenv("MOONCAKE_MASTER"), ) + @staticmethod + def load_from_extra_config(extra_config: dict) -> "MooncakeStoreConfig": + """Load config from extra_config dictionary.""" + if "master_server_address" not in extra_config: + raise ValueError("master_server_address is required in extra_config") + + return MooncakeStoreConfig( + local_hostname=extra_config.get("local_hostname", "localhost"), + metadata_server=extra_config.get("metadata_server", "P2PHANDSHAKE"), + global_segment_size=extra_config.get( + "global_segment_size", DEFAULT_GLOBAL_SEGMENT_SIZE + ), + local_buffer_size=extra_config.get( + "local_buffer_size", DEFAULT_LOCAL_BUFFER_SIZE + ), + protocol=extra_config.get("protocol", "tcp"), + device_name=extra_config.get("device_name", "auto"), + master_server_address=extra_config["master_server_address"], + ) + def __post_init__(self): if self.device_name == "auto": os.environ["MC_MS_AUTO_DISC"] = "1" @@ -93,8 +113,26 @@ class MooncakeStore(HiCacheStorage): try: self.store = MooncakeDistributedStore() - self.config = MooncakeStoreConfig.load_from_env() - logger.info("Mooncake Configuration loaded from env successfully.") + + extra_config = ( + getattr(storage_config, "extra_config", None) + if storage_config + else None + ) + # Load configuration with master_server_address prioritized from extra_config if available + if ( + extra_config is not None + and extra_config.get("master_server_address") is not None + ): + # Load from extra_config + self.config = MooncakeStoreConfig.load_from_extra_config(extra_config) + logger.info( + "Mooncake Configuration loaded from extra_config successfully." + ) + else: + # Load from environment variables + self.config = MooncakeStoreConfig.load_from_env() + logger.info("Mooncake Configuration loaded from env successfully.") ret_code = self.store.setup( self.config.local_hostname,