From 3cb0af0bcf3299089ca7e72159fa36e825a470f8 Mon Sep 17 00:00:00 2001 From: lty Date: Thu, 15 Jan 2026 16:26:53 +0800 Subject: [PATCH] [Refactor]Refactor of vllm_ascend/distributed module (#5910) ### What this PR does / why we need it? Based on the RFC:https://github.com/vllm-project/vllm-ascend/issues/5604 This PR is a refactoring of vllm_ascend/distributed. ### Does this PR introduce _any_ user-facing change? NA ### How was this patch tested? - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/11b6af5280d6d6dfb8953af16e67b25f819b3be9 Signed-off-by: lty --- vllm_ascend/distributed/kv_transfer/__init__.py | 2 +- .../kv_transfer/kv_pool/cpu_offload/cpu_offload_connector.py | 2 +- .../kv_pool/{ascend_store => cpu_offload}/metadata.py | 0 .../distributed/kv_transfer/{ => kv_pool}/ucm_connector.py | 0 4 files changed, 2 insertions(+), 2 deletions(-) rename vllm_ascend/distributed/kv_transfer/kv_pool/{ascend_store => cpu_offload}/metadata.py (100%) rename vllm_ascend/distributed/kv_transfer/{ => kv_pool}/ucm_connector.py (100%) diff --git a/vllm_ascend/distributed/kv_transfer/__init__.py b/vllm_ascend/distributed/kv_transfer/__init__.py index 7e3a0618..f2bf8d6f 100644 --- a/vllm_ascend/distributed/kv_transfer/__init__.py +++ b/vllm_ascend/distributed/kv_transfer/__init__.py @@ -41,5 +41,5 @@ def register_connector(): "MooncakeLayerwiseConnector") KVConnectorFactory.register_connector( - "UCMConnector", "vllm_ascend.distributed.kv_transfer.ucm_connector", + "UCMConnector", "vllm_ascend.distributed.kv_transfer.kv_pool.ucm_connector", "UCMConnectorV1") diff --git a/vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/cpu_offload_connector.py b/vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/cpu_offload_connector.py index 19a3c0ff..60128eb6 100644 --- a/vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/cpu_offload_connector.py +++ b/vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/cpu_offload_connector.py @@ -23,7 +23,7 @@ from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheSpec, MambaSpec, MLAAttentionSpec) -from vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.metadata import ( +from vllm_ascend.distributed.kv_transfer.kv_pool.cpu_offload.metadata import ( MetadataServer, MetadataServerProc, MLAConfig) from vllm_ascend.utils import vllm_version_is diff --git a/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/metadata.py b/vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/metadata.py similarity index 100% rename from vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/metadata.py rename to vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/metadata.py diff --git a/vllm_ascend/distributed/kv_transfer/ucm_connector.py b/vllm_ascend/distributed/kv_transfer/kv_pool/ucm_connector.py similarity index 100% rename from vllm_ascend/distributed/kv_transfer/ucm_connector.py rename to vllm_ascend/distributed/kv_transfer/kv_pool/ucm_connector.py