diff --git a/python/sglang/srt/mem_cache/storage/mooncake_store/README.md b/python/sglang/srt/mem_cache/storage/mooncake_store/README.md index 5385d5b25..fe0bb62af 100644 --- a/python/sglang/srt/mem_cache/storage/mooncake_store/README.md +++ b/python/sglang/srt/mem_cache/storage/mooncake_store/README.md @@ -119,7 +119,10 @@ Note: If `MOONCAKE_GLOBAL_SEGMENT_SIZE` is set to a non-zero value when starting Mooncake configuration can be provided via environment variables. Note that, for optimal performance, the Mooncake backend currently supports only the `page_first` layout (which optimizes memory access patterns for KV cache operations). -There are two ways to configure Mooncake: 1. Using environment variables; 2. Using extra-config of sglang arguments. +There are three ways to prepare mooncakes: +1. Use environment variables; +2. Use json configuration files; +3. Additional configuration using the sglang parameter. **Using env variables to configure Mooncake** @@ -143,6 +146,21 @@ Parameter Explanation: * `MOONCAKE_DEVICE`: The RDMA devices used by Mooncake. This parameter is required only when the protocol is set to `"rdma"`. Available devices can be listed using the `ibv_devices` command. * `MOONCAKE_GLOBAL_SEGMENT_SIZE`: The amount of memory (in bytes) contributed to the global memory pool. If at least one `store service` is launched, then this value could be set to `0`. In this case, the `SGLang server` will not contribute any memory to the system. Note that KV tensors cached in the contributed memory will be lost once this process terminates; however, this will not cause any system errors. +**Using JSON file to configure Mooncake** + +```bash +export SGLANG_HICACHE_MOONCAKE_CONFIG_PATH=/sgl-workspace/sglang/benchmark/hicache/mooncake_config.json +echo '{ + "local_hostname": "localhost", + "metadata_server": "http://localhost:8080/metadata", + "master_server_address": "localhost:50051", + "protocol": "rdma", + "device_name": "mlx5_0,mlx5_1", + "global_segment_size": 2684354560, + "local_buffer_size": 0 +}' > ${SGLANG_HICACHE_MOONCAKE_CONFIG_PATH} +``` + **Using extra-config of sglang arguments to configure Mooncake** ```bash diff --git a/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py b/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py index d3ea38fd5..2704581e6 100644 --- a/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py +++ b/python/sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py @@ -11,7 +11,7 @@ from sglang.srt.mem_cache.hicache_storage import HiCacheStorage, HiCacheStorageC DEFAULT_GLOBAL_SEGMENT_SIZE = 4 * 1024 * 1024 * 1024 # 4 GiB DEFAULT_LOCAL_BUFFER_SIZE = 16 * 1024 * 1024 # 16 MB - +DEFAULT_MOONCAKE_CONFIG_PATH_ENV = "SGLANG_HICACHE_MOONCAKE_CONFIG_PATH" logger = logging.getLogger(__name__) @@ -28,13 +28,13 @@ class MooncakeStoreConfig: @staticmethod def from_file() -> "MooncakeStoreConfig": """Load the config from a JSON file.""" - file_path = os.getenv("MOONCAKE_CONFIG_PATH") - if file_path is None: - raise ValueError( - "The environment variable 'MOONCAKE_CONFIG_PATH' is not set." - ) - with open(file_path) as fin: - config = json.load(fin) + file_path = os.getenv(DEFAULT_MOONCAKE_CONFIG_PATH_ENV) + try: + with open(file_path) as fin: + config = json.load(fin) + except Exception as e: + raise RuntimeError(f"Failed to load config from {file_path}: {str(e)}") + return MooncakeStoreConfig( local_hostname=config.get("local_hostname"), metadata_server=config.get("metadata_server"), @@ -101,6 +101,7 @@ class MooncakeStoreConfig: class MooncakeStore(HiCacheStorage): + def __init__(self, storage_config: HiCacheStorageConfig = None): try: from mooncake.store import MooncakeDistributedStore @@ -129,6 +130,10 @@ class MooncakeStore(HiCacheStorage): logger.info( "Mooncake Configuration loaded from extra_config successfully." ) + elif os.getenv(DEFAULT_MOONCAKE_CONFIG_PATH_ENV): + # Load from config file + self.config = MooncakeStoreConfig.from_file() + logger.info("Mooncake Configuration loaded from file successfully.") else: # Load from environment variables self.config = MooncakeStoreConfig.load_from_env()