From 6852a2e267e1ad6eafd4399b0b2d134a8f9dbe93 Mon Sep 17 00:00:00 2001
From: Junyuan <48611576+chloroethylene@users.noreply.github.com>
Date: Fri, 13 Mar 2026 17:41:35 +0800
Subject: [PATCH] [feat] add LMCacheAscendConnector (#6882)

### What this PR does / why we need it?

LMCache-Ascend is LMCache's solution on the Ascend platform and one of
the KVCache pooling solutions for Ascend. We hope to integrate
LMCache-Ascend into the vLLM-Ascend community as one of the official
KVCache pooling solutions for vLLM-Ascend.

We added a new LMCacheAscendConnector in vLLM-Ascend and registered it.

### Does this PR introduce _any_ user-facing change?

Users can specify the kvconnector using `--kv-transfer-config`, allowing
them to freely choose which kvconnector to use, without any user-facing
change.

### How was this patch tested?

Test by specifying `--kv-transfer-config
'{"kv_connector":"LMCacheAscendConnector","kv_role":"kv_both"}'`

- vLLM version: v0.16.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/15d76f74e2fdb12a95ea00f0ca283acf6219a2b7

---------

Signed-off-by: chloroethylene <jjysama@gmail.com>
---
 docs/source/user_guide/feature_guide/index.md |  1 +
 .../lmcache_ascend_deployment.md              | 94 +++++++++++++++++++
 mypy.ini                                      |  3 +
 .../distributed/kv_transfer/__init__.py       |  6 ++
 .../kv_pool/lmcache_ascend_connector.py       |  5 +
 5 files changed, 109 insertions(+)
 create mode 100644 docs/source/user_guide/feature_guide/lmcache_ascend_deployment.md
 create mode 100644 vllm_ascend/distributed/kv_transfer/kv_pool/lmcache_ascend_connector.py

diff --git a/docs/source/user_guide/feature_guide/index.md b/docs/source/user_guide/feature_guide/index.md
index f5496cc9..e31e0e39 100644
--- a/docs/source/user_guide/feature_guide/index.md
+++ b/docs/source/user_guide/feature_guide/index.md
@@ -28,4 +28,5 @@ npugraph_ex
 weight_prefetch
 sequence_parallelism
 batch_invariance
+lmcache_ascend_deployment
 :::
diff --git a/docs/source/user_guide/feature_guide/lmcache_ascend_deployment.md b/docs/source/user_guide/feature_guide/lmcache_ascend_deployment.md
new file mode 100644
index 00000000..458f47f6
--- /dev/null
+++ b/docs/source/user_guide/feature_guide/lmcache_ascend_deployment.md
@@ -0,0 +1,94 @@
+# LMCache-Ascend Deployment Guide
+
+## Overview
+
+LMCache-Ascend is a community maintained plugin for running LMCache on the Ascend NPU.
+
+We provide a simple deployment guide here. For further info about deployment notes, please refer to [LMCache-Ascend doc](https://github.com/LMCache/LMCache-Ascend/blob/main/README.md)
+
+## Getting Started
+
+### Clone LMCache-Ascend Repo
+
+Our repo contains a kvcache ops submodule for ease of maintenance, therefore we recommend cloning the repo with submodules.
+
+```bash
+cd /workspace
+git clone --recurse-submodules https://github.com/LMCache/LMCache-Ascend.git
+```
+
+### Docker
+
+```bash
+cd /workspace/LMCache-Ascend
+docker build -f docker/Dockerfile.a2.openEuler -t lmcache-ascend:v0.3.12-vllm-ascend-v0.11.0-openeuler .
+```
+
+Once that is built, run it with the following cmd
+
+```bash
+DEVICE_LIST="0,1,2,3,4,5,6,7"
+docker run -it \
+    --privileged \
+    --cap-add=SYS_RESOURCE \
+    --cap-add=IPC_LOCK \
+    -p 8000:8000 \
+    -p 8001:8001 \
+    --name lmcache-ascend-dev \
+    -e ASCEND_VISIBLE_DEVICES=${DEVICE_LIST} \
+    -e ASCEND_RT_VISIBLE_DEVICES=${DEVICE_LIST} \
+    -e ASCEND_TOTAL_MEMORY_GB=32 \
+    -e VLLM_TARGET_DEVICE=npu \
+    -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
+    -v /etc/localtime:/etc/localtime \
+    -v /var/log/npu:/var/log/npu \
+    -v /dev/davinci_manager:/dev/davinci_manager \
+    -v /dev/devmm_svm:/dev/devmm_svm \
+    -v /etc/ascend_install.info:/etc/ascend_install.info \
+    -v /etc/hccn.conf:/etc/hccn.conf \
+    lmcache-ascend:v0.3.12-vllm-ascend-v0.11.0-openeuler \
+    /bin/bash
+```
+
+### Manual Installation
+
+Assuming your working directory is ```/workspace``` and vllm/vllm-ascend have already been installed.
+
+1. Install LMCache Repo
+
+```bash
+NO_CUDA_EXT=1 pip install lmcache==0.3.12
+```
+
+2. Install LMCache-Ascend Repo
+
+```bash
+cd /workspace/LMCache-Ascend
+python3 -m pip install -v --no-build-isolation -e .
+```
+
+### Usage
+
+We introduce a dynamic KVConnector via LMCacheAscendConnectorV1Dynamic, therefore LMCache-Ascend Connector can be used via the kv transfer config in the two following setting.
+
+#### Online serving
+
+```bash
+python \
+    -m vllm.entrypoints.openai.api_server \
+    --port 8100 \
+    --model /data/models/Qwen/Qwen3-32B \
+    --trust-remote-code \
+    --disable-log-requests \
+    --block-size 128 \
+    --kv-transfer-config '{"kv_connector":"LMCacheAscendConnector","kv_role":"kv_both"}'
+```
+
+#### Offline
+
+```python
+ktc = KVTransferConfig(
+        kv_connector="LMCacheAscendConnector",
+        kv_role="kv_both"
+    )
+```
diff --git a/mypy.ini b/mypy.ini
index e9c2f8c4..996765b3 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -37,3 +37,6 @@ ignore_missing_imports = True
 
 [mypy-msmodelslim.*]
 ignore_missing_imports = True
+
+[mypy-lmcache_ascend.*]
+ignore_missing_imports = True
diff --git a/vllm_ascend/distributed/kv_transfer/__init__.py b/vllm_ascend/distributed/kv_transfer/__init__.py
index dae05787..45d50414 100644
--- a/vllm_ascend/distributed/kv_transfer/__init__.py
+++ b/vllm_ascend/distributed/kv_transfer/__init__.py
@@ -51,3 +51,9 @@ def register_connector():
     KVConnectorFactory.register_connector(
         "UCMConnector", "vllm_ascend.distributed.kv_transfer.kv_pool.ucm_connector", "UCMConnectorV1"
     )
+
+    KVConnectorFactory.register_connector(
+        "LMCacheAscendConnector",
+        "vllm_ascend.distributed.kv_transfer.kv_pool.lmcache_ascend_connector",
+        "LMCacheConnectorV1",
+    )
diff --git a/vllm_ascend/distributed/kv_transfer/kv_pool/lmcache_ascend_connector.py b/vllm_ascend/distributed/kv_transfer/kv_pool/lmcache_ascend_connector.py
new file mode 100644
index 00000000..dabaa19b
--- /dev/null
+++ b/vllm_ascend/distributed/kv_transfer/kv_pool/lmcache_ascend_connector.py
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: Apache-2.0
+import lmcache_ascend  # noqa: F401
+from vllm.distributed.kv_transfer.kv_connector.v1.lmcache_connector import LMCacheConnectorV1
+
+__all__ = ["LMCacheConnectorV1"]