[Bugfix]The service fails to be started when the memcache pool is enabled (#6229)

### What this PR does / why we need it?
The service fails to be started when the memcache pool is enabled
without configuring the mooncake path.

### Does this PR introduce _any_ user-facing change?
NA

### How was this patch tested?
```
#memcache
echo 200000 > /proc/sys/vm/nr_hugepages
source /usr/local/memfabric_hybrid/set_env.sh
source /usr/local/memcache_hybrid/set_env.sh
source /usr/local/Ascend/ascend-toolkit/set_env.sh
source /usr/local/Ascend/nnal/atb/set_env.sh
export MMC_LOCAL_CONFIG_PATH=/usr/local/memcache_hybrid/latest/config/mmc-local.conf

vllm serve /mnt/weight/DeepSeek-V3.2-Exp-W8A8 \
  --host $local_ip \
  --port 8002 \
  --served-model-name model \
  --data-parallel-size 2 \
  --tensor-parallel-size 8 \
  --enable-expert-parallel \
  --no-enable-prefix-caching \
  --no-enable-chunked-prefill \
  --max-num-seqs 4 \
  --max-model-len 8192 \
  --max-num-batched-tokens 8192 \
  --gpu-memory-utilization 0.9 \
  --trust-remote-code \
  --enforce-eager \
  --quantization ascend \
  --additional_config '{"ascend_scheduler_config":{"enabled":false}}' \
  --kv-transfer-config \
    '{
            "kv_connector": "AscendStoreConnector",
            "kv_role": "kv_both",
            "kv_connector_extra_config": {
	            "backend": "memcache",
                "lookup_rpc_port":"0"
            }
    }'
```

- vLLM version: v0.14.0
- vLLM main:
d68209402d

---------

Signed-off-by: lty <linhebiwen@gmail.com>
This commit is contained in:
lty
2026-02-02 16:26:18 +08:00
committed by GitHub
parent 460ea88276
commit 082aa2e5b7

View File

@@ -1,6 +1,7 @@
import importlib
import math import math
import threading import threading
from collections.abc import Callable, Generator from collections.abc import Generator
import torch import torch
from vllm.config import VllmConfig from vllm.config import VllmConfig
@@ -14,9 +15,6 @@ from vllm.distributed import (
from vllm.logger import logger from vllm.logger import logger
from vllm.v1.core.kv_cache_utils import BlockHash from vllm.v1.core.kv_cache_utils import BlockHash
from vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.backend.backend import Backend
from vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.backend.memcache_backend import MemcacheBackend
from vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.backend.mooncake_backend import MooncakeBackend
from vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.config_data import ( from vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.config_data import (
AscendConnectorMetadata, AscendConnectorMetadata,
ChunkedTokenDatabase, ChunkedTokenDatabase,
@@ -32,9 +30,15 @@ from vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.kv_transfer import
KVTransferThread, KVTransferThread,
) )
backend_map: dict[str, Callable[..., Backend]] = { backend_map = {
"mooncake": MooncakeBackend, "mooncake": {
"memcache": MemcacheBackend, "name": "MooncakeBackend",
"path": "vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.backend.mooncake_backend",
},
"memcache": {
"name": "MemcacheBackend",
"path": "vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.backend.memcache_backend",
},
} }
@@ -125,7 +129,13 @@ class KVPoolWorker:
self.token_database = ChunkedTokenDatabase(self.metadata, self.block_size, self.use_mla, partitions) self.token_database = ChunkedTokenDatabase(self.metadata, self.block_size, self.use_mla, partitions)
real_backend = backend_map.get(self.backend.lower()) backend = backend_map.get(self.backend.lower())
assert backend is not None
backend_path = backend.get("path")
backend_name = backend.get("name")
assert backend_path is not None and backend_name is not None
backend_module = importlib.import_module(backend_path)
real_backend = getattr(backend_module, backend_name)
# be removed later # be removed later
if self.backend == "mooncake": if self.backend == "mooncake":