[P/D][main]Offline the llmdatadist connector related parts of the code and files. (#4780)

### What this PR does / why we need it? As support for the mooncake connector is now available, the llmdatadist connector is no longer being maintained, so the llmdatadist-related files need to be retired. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By ci - vLLM version: v0.12.0 - vLLM main: ad32e3e19c --------- Signed-off-by: wangxiaoteng <wangxiaoteng@huawei.com> Signed-off-by: liziyu <liziyu16@huawei.com> Co-authored-by: liziyu <liziyu16@huawei.com>
2025-12-09 22:36:43 +08:00
parent 848419d1ba
commit a77045f355
19 changed files with 188 additions and 1819 deletions
--- a/examples/offline_disaggregated_prefill_npu.py
+++ b/examples/offline_disaggregated_prefill_npu.py
@@ -24,6 +24,7 @@ from multiprocessing import Event, Process
 os.environ["VLLM_USE_MODELSCOPE"] = "True"
 os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"

+
 def clean_up():
    import gc

@@ -37,9 +38,6 @@ def clean_up():


 def run_prefill(prefill_done, process_close):
-    # ranktable.json needs be generated using gen_ranktable.sh
-    # from the examples/disaggregated_prefill_v1 in the main branch.
-    os.environ['DISAGGREGATED_PREFILL_RANK_TABLE_PATH'] = "./ranktable.json"
    os.environ["ASCEND_RT_VISIBLE_DEVICES"] = "0"

    from vllm import LLM, SamplingParams
@@ -51,9 +49,22 @@ def run_prefill(prefill_done, process_close):
    ]
    sampling_params = SamplingParams(temperature=0, top_p=0.95, max_tokens=1)

-    ktc = KVTransferConfig(kv_connector="LLMDataDistCMgrConnector", kv_buffer_device="npu", kv_role="kv_producer",
-                           kv_parallel_size=1,
-                           kv_connector_module_path="vllm_ascend.distributed.llmdatadist_c_mgr_connector")
+    ktc = KVTransferConfig(
+        kv_connector="MooncakeConnector",
+        kv_role="kv_producer",
+        kv_port="30000",
+        engine_id="0",
+        kv_connector_module_path="vllm_ascend.distributed.mooncake_connector",
+        kv_connector_extra_config={
+            "prefill": {
+                "dp_size": 1,
+                "tp_size": 1
+            },
+            "decode": {
+                "dp_size": 1,
+                "tp_size": 1
+            }
+        })
    # Set NPU memory utilization to 0.8
    llm = LLM(model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
              kv_transfer_config=ktc,
@@ -79,10 +90,6 @@ def run_prefill(prefill_done, process_close):


 def run_decode(prefill_done):
-    os.environ['VLLM_ASCEND_LLMDD_RPC_PORT'] = '6634'
-    # ranktable.json needs be generated using gen_ranktable.sh
-    # from the examples/disaggregated_prefill_v1 module in the main branch.
-    os.environ['DISAGGREGATED_PREFILL_RANK_TABLE_PATH'] = "./ranktable.json"
    os.environ["ASCEND_RT_VISIBLE_DEVICES"] = "1"

    from vllm import LLM, SamplingParams
@@ -94,8 +101,22 @@ def run_decode(prefill_done):
    ]
    sampling_params = SamplingParams(temperature=0, top_p=0.95)

-    ktc = KVTransferConfig(kv_connector="LLMDataDistCMgrConnector", kv_buffer_device="npu", kv_role="kv_consumer",
-                           kv_parallel_size=1, kv_connector_module_path="vllm_ascend.distributed.llmdatadist_c_mgr_connector")
+    ktc = KVTransferConfig(
+        kv_connector="MooncakeConnector",
+        kv_role="kv_consumer",
+        kv_port="30100",
+        engine_id="1",
+        kv_connector_module_path="vllm_ascend.distributed.mooncake_connector",
+        kv_connector_extra_config={
+            "prefill": {
+                "dp_size": 1,
+                "tp_size": 1
+            },
+            "decode": {
+                "dp_size": 1,
+                "tp_size": 1
+            }
+        })

    llm = LLM(model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
              kv_transfer_config=ktc,