Hicache L3 backend mooncake optimization configuration reading method (#10319)
Co-authored-by: Teng Ma <sima.mt@alibaba-inc.com> Co-authored-by: huangtingwei <141888744+huangtingwei9988@users.noreply.github.com> Co-authored-by: shicang <shicang@shicang> Co-authored-by: Shangming Cai <csmthu@gmail.com>
This commit is contained in:
@@ -119,7 +119,10 @@ Note: If `MOONCAKE_GLOBAL_SEGMENT_SIZE` is set to a non-zero value when starting
|
|||||||
|
|
||||||
Mooncake configuration can be provided via environment variables. Note that, for optimal performance, the Mooncake backend currently supports only the `page_first` layout (which optimizes memory access patterns for KV cache operations).
|
Mooncake configuration can be provided via environment variables. Note that, for optimal performance, the Mooncake backend currently supports only the `page_first` layout (which optimizes memory access patterns for KV cache operations).
|
||||||
|
|
||||||
There are two ways to configure Mooncake: 1. Using environment variables; 2. Using extra-config of sglang arguments.
|
There are three ways to prepare mooncakes:
|
||||||
|
1. Use environment variables;
|
||||||
|
2. Use json configuration files;
|
||||||
|
3. Additional configuration using the sglang parameter.
|
||||||
|
|
||||||
**Using env variables to configure Mooncake**
|
**Using env variables to configure Mooncake**
|
||||||
|
|
||||||
@@ -143,6 +146,21 @@ Parameter Explanation:
|
|||||||
* `MOONCAKE_DEVICE`: The RDMA devices used by Mooncake. This parameter is required only when the protocol is set to `"rdma"`. Available devices can be listed using the `ibv_devices` command.
|
* `MOONCAKE_DEVICE`: The RDMA devices used by Mooncake. This parameter is required only when the protocol is set to `"rdma"`. Available devices can be listed using the `ibv_devices` command.
|
||||||
* `MOONCAKE_GLOBAL_SEGMENT_SIZE`: The amount of memory (in bytes) contributed to the global memory pool. If at least one `store service` is launched, then this value could be set to `0`. In this case, the `SGLang server` will not contribute any memory to the system. Note that KV tensors cached in the contributed memory will be lost once this process terminates; however, this will not cause any system errors.
|
* `MOONCAKE_GLOBAL_SEGMENT_SIZE`: The amount of memory (in bytes) contributed to the global memory pool. If at least one `store service` is launched, then this value could be set to `0`. In this case, the `SGLang server` will not contribute any memory to the system. Note that KV tensors cached in the contributed memory will be lost once this process terminates; however, this will not cause any system errors.
|
||||||
|
|
||||||
|
**Using JSON file to configure Mooncake**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export SGLANG_HICACHE_MOONCAKE_CONFIG_PATH=/sgl-workspace/sglang/benchmark/hicache/mooncake_config.json
|
||||||
|
echo '{
|
||||||
|
"local_hostname": "localhost",
|
||||||
|
"metadata_server": "http://localhost:8080/metadata",
|
||||||
|
"master_server_address": "localhost:50051",
|
||||||
|
"protocol": "rdma",
|
||||||
|
"device_name": "mlx5_0,mlx5_1",
|
||||||
|
"global_segment_size": 2684354560,
|
||||||
|
"local_buffer_size": 0
|
||||||
|
}' > ${SGLANG_HICACHE_MOONCAKE_CONFIG_PATH}
|
||||||
|
```
|
||||||
|
|
||||||
**Using extra-config of sglang arguments to configure Mooncake**
|
**Using extra-config of sglang arguments to configure Mooncake**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from sglang.srt.mem_cache.hicache_storage import HiCacheStorage, HiCacheStorageC
|
|||||||
|
|
||||||
DEFAULT_GLOBAL_SEGMENT_SIZE = 4 * 1024 * 1024 * 1024 # 4 GiB
|
DEFAULT_GLOBAL_SEGMENT_SIZE = 4 * 1024 * 1024 * 1024 # 4 GiB
|
||||||
DEFAULT_LOCAL_BUFFER_SIZE = 16 * 1024 * 1024 # 16 MB
|
DEFAULT_LOCAL_BUFFER_SIZE = 16 * 1024 * 1024 # 16 MB
|
||||||
|
DEFAULT_MOONCAKE_CONFIG_PATH_ENV = "SGLANG_HICACHE_MOONCAKE_CONFIG_PATH"
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -28,13 +28,13 @@ class MooncakeStoreConfig:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def from_file() -> "MooncakeStoreConfig":
|
def from_file() -> "MooncakeStoreConfig":
|
||||||
"""Load the config from a JSON file."""
|
"""Load the config from a JSON file."""
|
||||||
file_path = os.getenv("MOONCAKE_CONFIG_PATH")
|
file_path = os.getenv(DEFAULT_MOONCAKE_CONFIG_PATH_ENV)
|
||||||
if file_path is None:
|
try:
|
||||||
raise ValueError(
|
|
||||||
"The environment variable 'MOONCAKE_CONFIG_PATH' is not set."
|
|
||||||
)
|
|
||||||
with open(file_path) as fin:
|
with open(file_path) as fin:
|
||||||
config = json.load(fin)
|
config = json.load(fin)
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(f"Failed to load config from {file_path}: {str(e)}")
|
||||||
|
|
||||||
return MooncakeStoreConfig(
|
return MooncakeStoreConfig(
|
||||||
local_hostname=config.get("local_hostname"),
|
local_hostname=config.get("local_hostname"),
|
||||||
metadata_server=config.get("metadata_server"),
|
metadata_server=config.get("metadata_server"),
|
||||||
@@ -101,6 +101,7 @@ class MooncakeStoreConfig:
|
|||||||
|
|
||||||
|
|
||||||
class MooncakeStore(HiCacheStorage):
|
class MooncakeStore(HiCacheStorage):
|
||||||
|
|
||||||
def __init__(self, storage_config: HiCacheStorageConfig = None):
|
def __init__(self, storage_config: HiCacheStorageConfig = None):
|
||||||
try:
|
try:
|
||||||
from mooncake.store import MooncakeDistributedStore
|
from mooncake.store import MooncakeDistributedStore
|
||||||
@@ -129,6 +130,10 @@ class MooncakeStore(HiCacheStorage):
|
|||||||
logger.info(
|
logger.info(
|
||||||
"Mooncake Configuration loaded from extra_config successfully."
|
"Mooncake Configuration loaded from extra_config successfully."
|
||||||
)
|
)
|
||||||
|
elif os.getenv(DEFAULT_MOONCAKE_CONFIG_PATH_ENV):
|
||||||
|
# Load from config file
|
||||||
|
self.config = MooncakeStoreConfig.from_file()
|
||||||
|
logger.info("Mooncake Configuration loaded from file successfully.")
|
||||||
else:
|
else:
|
||||||
# Load from environment variables
|
# Load from environment variables
|
||||||
self.config = MooncakeStoreConfig.load_from_env()
|
self.config = MooncakeStoreConfig.load_from_env()
|
||||||
|
|||||||
Reference in New Issue
Block a user