From 9180dd6c51b35c9f000e5e1fee1b892c227dca99 Mon Sep 17 00:00:00 2001 From: Yuzhou Tong <48299280+YzTongNiar@users.noreply.github.com> Date: Mon, 2 Mar 2026 16:11:00 +0800 Subject: [PATCH] [BugFix][PCP] Fix presion bugs for pcp/dcp in PD disaggregate (#6876) ### What this PR does / why we need it? Fix a bug for PD disaggregate of PCP/DCP, some conditions only consider MLA while ignoring DSA. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/15d76f74e2fdb12a95ea00f0ca283acf6219a2b7 - vLLM Ascend main: 81fb7d57791c2fc69577047eab35718085feb324 Signed-off-by: tongyuzhou Co-authored-by: tongyuzhou --- .../distributed/kv_transfer/kv_p2p/mooncake_connector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py index 02fd1a61..4c692e33 100644 --- a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py +++ b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py @@ -1323,13 +1323,13 @@ class MooncakeConnectorWorker: def context_parallel_parameters_check(): assert (meta.remote_pcp_size * meta.remote_dcp_size) % (self.pcp_size * self.dcp_size) == 0 - if not self.use_mla: + if not (self.use_mla or self.use_sparse): p_node_heads_per_rank = math.ceil(self.num_key_value_heads / prefill_tp_size) d_node_heads_per_rank = math.ceil(self.num_key_value_heads / self.tp_size) assert d_node_heads_per_rank % p_node_heads_per_rank == 0 def get_kv_head_groups(tp_size): - if self.use_mla: + if self.use_mla or self.use_sparse: kv_head_groups = [] kv_head_ids = [0] kv_head_groups.append(tuple(kv_head_ids))