[P/D][main]Offline the llmdatadist connector related parts of the code and files. (#4780)

### What this PR does / why we need it?
As support for the mooncake connector is now available, the llmdatadist
connector is no longer being maintained, so the llmdatadist-related
files need to be retired.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By ci

- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c

---------

Signed-off-by: wangxiaoteng <wangxiaoteng@huawei.com>
Signed-off-by: liziyu <liziyu16@huawei.com>
Co-authored-by: liziyu <liziyu16@huawei.com>
This commit is contained in:
wangxiaoteng888
2025-12-09 22:36:43 +08:00
committed by GitHub
parent 848419d1ba
commit a77045f355
19 changed files with 188 additions and 1819 deletions

View File

@@ -24,6 +24,7 @@ from multiprocessing import Event, Process
os.environ["VLLM_USE_MODELSCOPE"] = "True"
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
def clean_up():
import gc
@@ -37,9 +38,6 @@ def clean_up():
def run_prefill(prefill_done, process_close):
# ranktable.json needs be generated using gen_ranktable.sh
# from the examples/disaggregated_prefill_v1 in the main branch.
os.environ['DISAGGREGATED_PREFILL_RANK_TABLE_PATH'] = "./ranktable.json"
os.environ["ASCEND_RT_VISIBLE_DEVICES"] = "0"
from vllm import LLM, SamplingParams
@@ -51,9 +49,22 @@ def run_prefill(prefill_done, process_close):
]
sampling_params = SamplingParams(temperature=0, top_p=0.95, max_tokens=1)
ktc = KVTransferConfig(kv_connector="LLMDataDistCMgrConnector", kv_buffer_device="npu", kv_role="kv_producer",
kv_parallel_size=1,
kv_connector_module_path="vllm_ascend.distributed.llmdatadist_c_mgr_connector")
ktc = KVTransferConfig(
kv_connector="MooncakeConnector",
kv_role="kv_producer",
kv_port="30000",
engine_id="0",
kv_connector_module_path="vllm_ascend.distributed.mooncake_connector",
kv_connector_extra_config={
"prefill": {
"dp_size": 1,
"tp_size": 1
},
"decode": {
"dp_size": 1,
"tp_size": 1
}
})
# Set NPU memory utilization to 0.8
llm = LLM(model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
kv_transfer_config=ktc,
@@ -79,10 +90,6 @@ def run_prefill(prefill_done, process_close):
def run_decode(prefill_done):
os.environ['VLLM_ASCEND_LLMDD_RPC_PORT'] = '6634'
# ranktable.json needs be generated using gen_ranktable.sh
# from the examples/disaggregated_prefill_v1 module in the main branch.
os.environ['DISAGGREGATED_PREFILL_RANK_TABLE_PATH'] = "./ranktable.json"
os.environ["ASCEND_RT_VISIBLE_DEVICES"] = "1"
from vllm import LLM, SamplingParams
@@ -94,8 +101,22 @@ def run_decode(prefill_done):
]
sampling_params = SamplingParams(temperature=0, top_p=0.95)
ktc = KVTransferConfig(kv_connector="LLMDataDistCMgrConnector", kv_buffer_device="npu", kv_role="kv_consumer",
kv_parallel_size=1, kv_connector_module_path="vllm_ascend.distributed.llmdatadist_c_mgr_connector")
ktc = KVTransferConfig(
kv_connector="MooncakeConnector",
kv_role="kv_consumer",
kv_port="30100",
engine_id="1",
kv_connector_module_path="vllm_ascend.distributed.mooncake_connector",
kv_connector_extra_config={
"prefill": {
"dp_size": 1,
"tp_size": 1
},
"decode": {
"dp_size": 1,
"tp_size": 1
}
})
llm = LLM(model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
kv_transfer_config=ktc,