[BugFix][PCP] Fix presion bugs for pcp/dcp in PD disaggregate (#6876)
### What this PR does / why we need it? Fix a bug for PD disaggregate of PCP/DCP, some conditions only consider MLA while ignoring DSA. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main:15d76f74e2- vLLM Ascend main:81fb7d5779Signed-off-by: tongyuzhou <tongyuzhou1@huawei.com> Co-authored-by: tongyuzhou <tongyuzhou1@huawei.com>
This commit is contained in:
@@ -1323,13 +1323,13 @@ class MooncakeConnectorWorker:
|
||||
|
||||
def context_parallel_parameters_check():
|
||||
assert (meta.remote_pcp_size * meta.remote_dcp_size) % (self.pcp_size * self.dcp_size) == 0
|
||||
if not self.use_mla:
|
||||
if not (self.use_mla or self.use_sparse):
|
||||
p_node_heads_per_rank = math.ceil(self.num_key_value_heads / prefill_tp_size)
|
||||
d_node_heads_per_rank = math.ceil(self.num_key_value_heads / self.tp_size)
|
||||
assert d_node_heads_per_rank % p_node_heads_per_rank == 0
|
||||
|
||||
def get_kv_head_groups(tp_size):
|
||||
if self.use_mla:
|
||||
if self.use_mla or self.use_sparse:
|
||||
kv_head_groups = []
|
||||
kv_head_ids = [0]
|
||||
kv_head_groups.append(tuple(kv_head_ids))
|
||||
|
||||
Reference in New Issue
Block a user