diff --git a/vllm_ascend/distributed/kv_transfer/__init__.py b/vllm_ascend/distributed/kv_transfer/__init__.py index 7e3a0618..f2bf8d6f 100644 --- a/vllm_ascend/distributed/kv_transfer/__init__.py +++ b/vllm_ascend/distributed/kv_transfer/__init__.py @@ -41,5 +41,5 @@ def register_connector(): "MooncakeLayerwiseConnector") KVConnectorFactory.register_connector( - "UCMConnector", "vllm_ascend.distributed.kv_transfer.ucm_connector", + "UCMConnector", "vllm_ascend.distributed.kv_transfer.kv_pool.ucm_connector", "UCMConnectorV1") diff --git a/vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/cpu_offload_connector.py b/vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/cpu_offload_connector.py index 19a3c0ff..60128eb6 100644 --- a/vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/cpu_offload_connector.py +++ b/vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/cpu_offload_connector.py @@ -23,7 +23,7 @@ from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheSpec, MambaSpec, MLAAttentionSpec) -from vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.metadata import ( +from vllm_ascend.distributed.kv_transfer.kv_pool.cpu_offload.metadata import ( MetadataServer, MetadataServerProc, MLAConfig) from vllm_ascend.utils import vllm_version_is diff --git a/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/metadata.py b/vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/metadata.py similarity index 100% rename from vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/metadata.py rename to vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/metadata.py diff --git a/vllm_ascend/distributed/kv_transfer/ucm_connector.py b/vllm_ascend/distributed/kv_transfer/kv_pool/ucm_connector.py similarity index 100% rename from vllm_ascend/distributed/kv_transfer/ucm_connector.py rename to vllm_ascend/distributed/kv_transfer/kv_pool/ucm_connector.py