[P/D][main]Offline the llmdatadist connector related parts of the code and files. (#4780)

### What this PR does / why we need it? As support for the mooncake connector is now available, the llmdatadist connector is no longer being maintained, so the llmdatadist-related files need to be retired. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By ci - vLLM version: v0.12.0 - vLLM main: ad32e3e19c --------- Signed-off-by: wangxiaoteng <wangxiaoteng@huawei.com> Signed-off-by: liziyu <liziyu16@huawei.com> Co-authored-by: liziyu <liziyu16@huawei.com>
2025-12-09 22:36:43 +08:00
parent 848419d1ba
commit a77045f355
19 changed files with 188 additions and 1819 deletions
--- a/vllm_ascend/distributed/init.py
+++ b/vllm_ascend/distributed/init.py
@@ -20,11 +20,6 @@ from vllm.distributed.kv_transfer.kv_connector.factory import \


 def register_connector():
-    KVConnectorFactory.register_connector(
-        "LLMDataDistCMgrConnector",
-        "vllm_ascend.distributed.llmdatadist_c_mgr_connector",
-        "LLMDataDistCMgrConnector")
-
    KVConnectorFactory.register_connector(
        "MooncakeConnectorV1", "vllm_ascend.distributed.mooncake_connector",
        "MooncakeConnector")
--- a/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py
+++ b/vllm_ascend/distributed/llmdatadist_c_mgr_connector.py
--- a/vllm_ascend/envs.py
+++ b/vllm_ascend/envs.py
@@ -103,23 +103,6 @@ env_variables: Dict[str, Callable[[], Any]] = {
    "VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION":
    lambda: bool(
        int(os.getenv("VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION", '1'))),
-    # `LLMDataDistCMgrConnector` required variable. `DISAGGREGATED_PREFILL_RANK_TABLE_PATH` is
-    # used for llmdatadist to build the communication topology for kv cache transfer, it is
-    # a required variable if `LLMDataDistCMgrConnector` is used as kv connector for disaggregated
-    # pd. The rank table can be generated by adopting the script `gen_ranktable.sh`
-    # in vllm_ascend's example folder.
-    "DISAGGREGATED_PREFILL_RANK_TABLE_PATH":
-    lambda: os.getenv("DISAGGREGATED_PREFILL_RANK_TABLE_PATH", None),
-    # `LLMDataDistCMgrConnector` required variable. `VLLM_ASCEND_LLMDD_RPC_IP` is used as the
-    # rpc communication listening ip, which will be used to receive the agent metadata from the
-    # remote worker.
-    "VLLM_ASCEND_LLMDD_RPC_IP":
-    lambda: os.getenv("VLLM_ASCEND_LLMDD_RPC_IP", "0.0.0.0"),
-    # `LLMDataDistCMgrConnector` required variable. `VLLM_ASCEND_LLMDD_RPC_PORT` is used as the
-    # rpc communication listening port, which will be used to receive the agent metadata from the
-    # remote worker.
-    "VLLM_ASCEND_LLMDD_RPC_PORT":
-    lambda: int(os.getenv("VLLM_ASCEND_LLMDD_RPC_PORT", 5557)),
    # Whether to enable mla_pa for deepseek mla decode, this flag will be removed after its available torch_npu is public accessible
    # and the mla_pa will be the default path of deepseek decode path.
    "VLLM_ASCEND_MLA_PA":
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -3398,7 +3398,7 @@ class NPUModelRunner(LoRAModelRunnerMixin, ECConnectorModelRunnerMixin):
        # init kv cache tensors
        kv_cache_raw_tensors: dict[str, Union[torch.Tensor,
                                              Optional[torch.Tensor]]] = {}
-        # llmdatadist need the addr of cache tensor be aligned with 2M
+        # prefill disaggregation need the addr of cache tensor be aligned with 2M
        alignment = 2 * 1024 * 1024
        for kv_cache_tensor in kv_cache_config.kv_cache_tensors:
            # TODO: REFACTOR ME to sharing hybrid cache
@@ -3426,7 +3426,7 @@ class NPUModelRunner(LoRAModelRunnerMixin, ECConnectorModelRunnerMixin):
                elif "attn" in layer_name and layer_name not in kv_cache_raw_tensors.keys(
                ):
                    # NOTE: We need to init k cache tensor (nope cache tensor in mla) and
-                    # v cache tensor (rope cache tensor in mla) separately to support llmdatadist,
+                    # v cache tensor (rope cache tensor in mla) separately to support prefill disaggregation,
                    # as it only support the 0-dim of kv_cache is `num_blocks`.
                    # For deepseek mla, we need to spilt cache tensor accrodding to the nope head dim
                    # and rope head dim.