[P/D][main]Offline the llmdatadist connector related parts of the code and files. (#4780)

### What this PR does / why we need it?
As support for the mooncake connector is now available, the llmdatadist
connector is no longer being maintained, so the llmdatadist-related
files need to be retired.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By ci

- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c

---------

Signed-off-by: wangxiaoteng <wangxiaoteng@huawei.com>
Signed-off-by: liziyu <liziyu16@huawei.com>
Co-authored-by: liziyu <liziyu16@huawei.com>
This commit is contained in:
wangxiaoteng888
2025-12-09 22:36:43 +08:00
committed by GitHub
parent 848419d1ba
commit a77045f355
19 changed files with 188 additions and 1819 deletions

View File

@@ -20,11 +20,6 @@ from vllm.distributed.kv_transfer.kv_connector.factory import \
def register_connector():
KVConnectorFactory.register_connector(
"LLMDataDistCMgrConnector",
"vllm_ascend.distributed.llmdatadist_c_mgr_connector",
"LLMDataDistCMgrConnector")
KVConnectorFactory.register_connector(
"MooncakeConnectorV1", "vllm_ascend.distributed.mooncake_connector",
"MooncakeConnector")

File diff suppressed because it is too large Load Diff

View File

@@ -103,23 +103,6 @@ env_variables: Dict[str, Callable[[], Any]] = {
"VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION":
lambda: bool(
int(os.getenv("VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION", '1'))),
# `LLMDataDistCMgrConnector` required variable. `DISAGGREGATED_PREFILL_RANK_TABLE_PATH` is
# used for llmdatadist to build the communication topology for kv cache transfer, it is
# a required variable if `LLMDataDistCMgrConnector` is used as kv connector for disaggregated
# pd. The rank table can be generated by adopting the script `gen_ranktable.sh`
# in vllm_ascend's example folder.
"DISAGGREGATED_PREFILL_RANK_TABLE_PATH":
lambda: os.getenv("DISAGGREGATED_PREFILL_RANK_TABLE_PATH", None),
# `LLMDataDistCMgrConnector` required variable. `VLLM_ASCEND_LLMDD_RPC_IP` is used as the
# rpc communication listening ip, which will be used to receive the agent metadata from the
# remote worker.
"VLLM_ASCEND_LLMDD_RPC_IP":
lambda: os.getenv("VLLM_ASCEND_LLMDD_RPC_IP", "0.0.0.0"),
# `LLMDataDistCMgrConnector` required variable. `VLLM_ASCEND_LLMDD_RPC_PORT` is used as the
# rpc communication listening port, which will be used to receive the agent metadata from the
# remote worker.
"VLLM_ASCEND_LLMDD_RPC_PORT":
lambda: int(os.getenv("VLLM_ASCEND_LLMDD_RPC_PORT", 5557)),
# Whether to enable mla_pa for deepseek mla decode, this flag will be removed after its available torch_npu is public accessible
# and the mla_pa will be the default path of deepseek decode path.
"VLLM_ASCEND_MLA_PA":

View File

@@ -3398,7 +3398,7 @@ class NPUModelRunner(LoRAModelRunnerMixin, ECConnectorModelRunnerMixin):
# init kv cache tensors
kv_cache_raw_tensors: dict[str, Union[torch.Tensor,
Optional[torch.Tensor]]] = {}
# llmdatadist need the addr of cache tensor be aligned with 2M
# prefill disaggregation need the addr of cache tensor be aligned with 2M
alignment = 2 * 1024 * 1024
for kv_cache_tensor in kv_cache_config.kv_cache_tensors:
# TODO: REFACTOR ME to sharing hybrid cache
@@ -3426,7 +3426,7 @@ class NPUModelRunner(LoRAModelRunnerMixin, ECConnectorModelRunnerMixin):
elif "attn" in layer_name and layer_name not in kv_cache_raw_tensors.keys(
):
# NOTE: We need to init k cache tensor (nope cache tensor in mla) and
# v cache tensor (rope cache tensor in mla) separately to support llmdatadist,
# v cache tensor (rope cache tensor in mla) separately to support prefill disaggregation,
# as it only support the 0-dim of kv_cache is `num_blocks`.
# For deepseek mla, we need to spilt cache tensor accrodding to the nope head dim
# and rope head dim.