[BugFix][PCP] Fix presion bugs for pcp/dcp in PD disaggregate (#6876)
### What this PR does / why we need it? Fix a bug for PD disaggregate of PCP/DCP, some conditions only consider MLA while ignoring DSA. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main:15d76f74e2- vLLM Ascend main:81fb7d5779Signed-off-by: tongyuzhou <tongyuzhou1@huawei.com> Co-authored-by: tongyuzhou <tongyuzhou1@huawei.com>
This commit is contained in:
@@ -1323,13 +1323,13 @@ class MooncakeConnectorWorker:
|
|||||||
|
|
||||||
def context_parallel_parameters_check():
|
def context_parallel_parameters_check():
|
||||||
assert (meta.remote_pcp_size * meta.remote_dcp_size) % (self.pcp_size * self.dcp_size) == 0
|
assert (meta.remote_pcp_size * meta.remote_dcp_size) % (self.pcp_size * self.dcp_size) == 0
|
||||||
if not self.use_mla:
|
if not (self.use_mla or self.use_sparse):
|
||||||
p_node_heads_per_rank = math.ceil(self.num_key_value_heads / prefill_tp_size)
|
p_node_heads_per_rank = math.ceil(self.num_key_value_heads / prefill_tp_size)
|
||||||
d_node_heads_per_rank = math.ceil(self.num_key_value_heads / self.tp_size)
|
d_node_heads_per_rank = math.ceil(self.num_key_value_heads / self.tp_size)
|
||||||
assert d_node_heads_per_rank % p_node_heads_per_rank == 0
|
assert d_node_heads_per_rank % p_node_heads_per_rank == 0
|
||||||
|
|
||||||
def get_kv_head_groups(tp_size):
|
def get_kv_head_groups(tp_size):
|
||||||
if self.use_mla:
|
if self.use_mla or self.use_sparse:
|
||||||
kv_head_groups = []
|
kv_head_groups = []
|
||||||
kv_head_ids = [0]
|
kv_head_ids = [0]
|
||||||
kv_head_groups.append(tuple(kv_head_ids))
|
kv_head_groups.append(tuple(kv_head_ids))
|
||||||
|
|||||||
Reference in New Issue
Block a user