[P/D][main]Offline the llmdatadist connector related parts of the code and files. (#4780)
### What this PR does / why we need it?
As support for the mooncake connector is now available, the llmdatadist
connector is no longer being maintained, so the llmdatadist-related
files need to be retired.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
By ci
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: wangxiaoteng <wangxiaoteng@huawei.com>
Signed-off-by: liziyu <liziyu16@huawei.com>
Co-authored-by: liziyu <liziyu16@huawei.com>
This commit is contained in:
@@ -20,11 +20,6 @@ from vllm.distributed.kv_transfer.kv_connector.factory import \
|
||||
|
||||
|
||||
def register_connector():
|
||||
KVConnectorFactory.register_connector(
|
||||
"LLMDataDistCMgrConnector",
|
||||
"vllm_ascend.distributed.llmdatadist_c_mgr_connector",
|
||||
"LLMDataDistCMgrConnector")
|
||||
|
||||
KVConnectorFactory.register_connector(
|
||||
"MooncakeConnectorV1", "vllm_ascend.distributed.mooncake_connector",
|
||||
"MooncakeConnector")
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -103,23 +103,6 @@ env_variables: Dict[str, Callable[[], Any]] = {
|
||||
"VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION":
|
||||
lambda: bool(
|
||||
int(os.getenv("VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION", '1'))),
|
||||
# `LLMDataDistCMgrConnector` required variable. `DISAGGREGATED_PREFILL_RANK_TABLE_PATH` is
|
||||
# used for llmdatadist to build the communication topology for kv cache transfer, it is
|
||||
# a required variable if `LLMDataDistCMgrConnector` is used as kv connector for disaggregated
|
||||
# pd. The rank table can be generated by adopting the script `gen_ranktable.sh`
|
||||
# in vllm_ascend's example folder.
|
||||
"DISAGGREGATED_PREFILL_RANK_TABLE_PATH":
|
||||
lambda: os.getenv("DISAGGREGATED_PREFILL_RANK_TABLE_PATH", None),
|
||||
# `LLMDataDistCMgrConnector` required variable. `VLLM_ASCEND_LLMDD_RPC_IP` is used as the
|
||||
# rpc communication listening ip, which will be used to receive the agent metadata from the
|
||||
# remote worker.
|
||||
"VLLM_ASCEND_LLMDD_RPC_IP":
|
||||
lambda: os.getenv("VLLM_ASCEND_LLMDD_RPC_IP", "0.0.0.0"),
|
||||
# `LLMDataDistCMgrConnector` required variable. `VLLM_ASCEND_LLMDD_RPC_PORT` is used as the
|
||||
# rpc communication listening port, which will be used to receive the agent metadata from the
|
||||
# remote worker.
|
||||
"VLLM_ASCEND_LLMDD_RPC_PORT":
|
||||
lambda: int(os.getenv("VLLM_ASCEND_LLMDD_RPC_PORT", 5557)),
|
||||
# Whether to enable mla_pa for deepseek mla decode, this flag will be removed after its available torch_npu is public accessible
|
||||
# and the mla_pa will be the default path of deepseek decode path.
|
||||
"VLLM_ASCEND_MLA_PA":
|
||||
|
||||
@@ -3398,7 +3398,7 @@ class NPUModelRunner(LoRAModelRunnerMixin, ECConnectorModelRunnerMixin):
|
||||
# init kv cache tensors
|
||||
kv_cache_raw_tensors: dict[str, Union[torch.Tensor,
|
||||
Optional[torch.Tensor]]] = {}
|
||||
# llmdatadist need the addr of cache tensor be aligned with 2M
|
||||
# prefill disaggregation need the addr of cache tensor be aligned with 2M
|
||||
alignment = 2 * 1024 * 1024
|
||||
for kv_cache_tensor in kv_cache_config.kv_cache_tensors:
|
||||
# TODO: REFACTOR ME to sharing hybrid cache
|
||||
@@ -3426,7 +3426,7 @@ class NPUModelRunner(LoRAModelRunnerMixin, ECConnectorModelRunnerMixin):
|
||||
elif "attn" in layer_name and layer_name not in kv_cache_raw_tensors.keys(
|
||||
):
|
||||
# NOTE: We need to init k cache tensor (nope cache tensor in mla) and
|
||||
# v cache tensor (rope cache tensor in mla) separately to support llmdatadist,
|
||||
# v cache tensor (rope cache tensor in mla) separately to support prefill disaggregation,
|
||||
# as it only support the 0-dim of kv_cache is `num_blocks`.
|
||||
# For deepseek mla, we need to spilt cache tensor accrodding to the nope head dim
|
||||
# and rope head dim.
|
||||
|
||||
Reference in New Issue
Block a user