From 6852a2e267e1ad6eafd4399b0b2d134a8f9dbe93 Mon Sep 17 00:00:00 2001 From: Junyuan <48611576+chloroethylene@users.noreply.github.com> Date: Fri, 13 Mar 2026 17:41:35 +0800 Subject: [PATCH] [feat] add LMCacheAscendConnector (#6882) ### What this PR does / why we need it? LMCache-Ascend is LMCache's solution on the Ascend platform and one of the KVCache pooling solutions for Ascend. We hope to integrate LMCache-Ascend into the vLLM-Ascend community as one of the official KVCache pooling solutions for vLLM-Ascend. We added a new LMCacheAscendConnector in vLLM-Ascend and registered it. ### Does this PR introduce _any_ user-facing change? Users can specify the kvconnector using `--kv-transfer-config`, allowing them to freely choose which kvconnector to use, without any user-facing change. ### How was this patch tested? Test by specifying `--kv-transfer-config '{"kv_connector":"LMCacheAscendConnector","kv_role":"kv_both"}'` - vLLM version: v0.16.0 - vLLM main: https://github.com/vllm-project/vllm/commit/15d76f74e2fdb12a95ea00f0ca283acf6219a2b7 --------- Signed-off-by: chloroethylene --- docs/source/user_guide/feature_guide/index.md | 1 + .../lmcache_ascend_deployment.md | 94 +++++++++++++++++++ mypy.ini | 3 + .../distributed/kv_transfer/__init__.py | 6 ++ .../kv_pool/lmcache_ascend_connector.py | 5 + 5 files changed, 109 insertions(+) create mode 100644 docs/source/user_guide/feature_guide/lmcache_ascend_deployment.md create mode 100644 vllm_ascend/distributed/kv_transfer/kv_pool/lmcache_ascend_connector.py diff --git a/docs/source/user_guide/feature_guide/index.md b/docs/source/user_guide/feature_guide/index.md index f5496cc9..e31e0e39 100644 --- a/docs/source/user_guide/feature_guide/index.md +++ b/docs/source/user_guide/feature_guide/index.md @@ -28,4 +28,5 @@ npugraph_ex weight_prefetch sequence_parallelism batch_invariance +lmcache_ascend_deployment ::: diff --git a/docs/source/user_guide/feature_guide/lmcache_ascend_deployment.md b/docs/source/user_guide/feature_guide/lmcache_ascend_deployment.md new file mode 100644 index 00000000..458f47f6 --- /dev/null +++ b/docs/source/user_guide/feature_guide/lmcache_ascend_deployment.md @@ -0,0 +1,94 @@ +# LMCache-Ascend Deployment Guide + +## Overview + +LMCache-Ascend is a community maintained plugin for running LMCache on the Ascend NPU. + +We provide a simple deployment guide here. For further info about deployment notes, please refer to [LMCache-Ascend doc](https://github.com/LMCache/LMCache-Ascend/blob/main/README.md) + +## Getting Started + +### Clone LMCache-Ascend Repo + +Our repo contains a kvcache ops submodule for ease of maintenance, therefore we recommend cloning the repo with submodules. + +```bash +cd /workspace +git clone --recurse-submodules https://github.com/LMCache/LMCache-Ascend.git +``` + +### Docker + +```bash +cd /workspace/LMCache-Ascend +docker build -f docker/Dockerfile.a2.openEuler -t lmcache-ascend:v0.3.12-vllm-ascend-v0.11.0-openeuler . +``` + +Once that is built, run it with the following cmd + +```bash +DEVICE_LIST="0,1,2,3,4,5,6,7" +docker run -it \ + --privileged \ + --cap-add=SYS_RESOURCE \ + --cap-add=IPC_LOCK \ + -p 8000:8000 \ + -p 8001:8001 \ + --name lmcache-ascend-dev \ + -e ASCEND_VISIBLE_DEVICES=${DEVICE_LIST} \ + -e ASCEND_RT_VISIBLE_DEVICES=${DEVICE_LIST} \ + -e ASCEND_TOTAL_MEMORY_GB=32 \ + -e VLLM_TARGET_DEVICE=npu \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v /etc/localtime:/etc/localtime \ + -v /var/log/npu:/var/log/npu \ + -v /dev/davinci_manager:/dev/davinci_manager \ + -v /dev/devmm_svm:/dev/devmm_svm \ + -v /etc/ascend_install.info:/etc/ascend_install.info \ + -v /etc/hccn.conf:/etc/hccn.conf \ + lmcache-ascend:v0.3.12-vllm-ascend-v0.11.0-openeuler \ + /bin/bash +``` + +### Manual Installation + +Assuming your working directory is ```/workspace``` and vllm/vllm-ascend have already been installed. + +1. Install LMCache Repo + +```bash +NO_CUDA_EXT=1 pip install lmcache==0.3.12 +``` + +2. Install LMCache-Ascend Repo + +```bash +cd /workspace/LMCache-Ascend +python3 -m pip install -v --no-build-isolation -e . +``` + +### Usage + +We introduce a dynamic KVConnector via LMCacheAscendConnectorV1Dynamic, therefore LMCache-Ascend Connector can be used via the kv transfer config in the two following setting. + +#### Online serving + +```bash +python \ + -m vllm.entrypoints.openai.api_server \ + --port 8100 \ + --model /data/models/Qwen/Qwen3-32B \ + --trust-remote-code \ + --disable-log-requests \ + --block-size 128 \ + --kv-transfer-config '{"kv_connector":"LMCacheAscendConnector","kv_role":"kv_both"}' +``` + +#### Offline + +```python +ktc = KVTransferConfig( + kv_connector="LMCacheAscendConnector", + kv_role="kv_both" + ) +``` diff --git a/mypy.ini b/mypy.ini index e9c2f8c4..996765b3 100644 --- a/mypy.ini +++ b/mypy.ini @@ -37,3 +37,6 @@ ignore_missing_imports = True [mypy-msmodelslim.*] ignore_missing_imports = True + +[mypy-lmcache_ascend.*] +ignore_missing_imports = True diff --git a/vllm_ascend/distributed/kv_transfer/__init__.py b/vllm_ascend/distributed/kv_transfer/__init__.py index dae05787..45d50414 100644 --- a/vllm_ascend/distributed/kv_transfer/__init__.py +++ b/vllm_ascend/distributed/kv_transfer/__init__.py @@ -51,3 +51,9 @@ def register_connector(): KVConnectorFactory.register_connector( "UCMConnector", "vllm_ascend.distributed.kv_transfer.kv_pool.ucm_connector", "UCMConnectorV1" ) + + KVConnectorFactory.register_connector( + "LMCacheAscendConnector", + "vllm_ascend.distributed.kv_transfer.kv_pool.lmcache_ascend_connector", + "LMCacheConnectorV1", + ) diff --git a/vllm_ascend/distributed/kv_transfer/kv_pool/lmcache_ascend_connector.py b/vllm_ascend/distributed/kv_transfer/kv_pool/lmcache_ascend_connector.py new file mode 100644 index 00000000..dabaa19b --- /dev/null +++ b/vllm_ascend/distributed/kv_transfer/kv_pool/lmcache_ascend_connector.py @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: Apache-2.0 +import lmcache_ascend # noqa: F401 +from vllm.distributed.kv_transfer.kv_connector.v1.lmcache_connector import LMCacheConnectorV1 + +__all__ = ["LMCacheConnectorV1"]