[v0.18.0][Bugfix] fix ds3.2 dcp mtp (#7681)

### What this PR does / why we need it?
Fixed the issue where the DCP overlaps the MTP scenario in the ds3.2
scenario.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

cherry-pick from: https://github.com/vllm-project/vllm-ascend/pull/7617

Signed-off-by: weiguihua2 <weiguihua2@huawei.com>
This commit is contained in:
weiguihua2
2026-03-27 14:24:53 +08:00
committed by GitHub
parent 048c8d1afe
commit bc8e87f3db
10 changed files with 18 additions and 7 deletions

View File

@@ -600,7 +600,7 @@ class SpecDecodeBaseProposer(EagleProposer):
- 1
)
num_accept_tokens = query_lens_d.to(self.device) - num_reject_tokens
ori_seq_len = attn_metadata_i.seq_lens[:batch_size].clone()
ori_seq_len = attn_metadata_i.seq_lens_cpu[:batch_size].clone()
mtp_slot_mapping = self.runner.pcp_manager.mtp_slot_pad
# slot_mapping index base offset:
@@ -1247,7 +1247,8 @@ class SpecDecodeBaseProposer(EagleProposer):
if self.pcp_size * self.dcp_size > 1:
if self.vllm_config.model_config.use_mla:
attn_metadata.decode.cp_seq_len = cp_seq_len
if getattr(attn_metadata, "decode", None):
attn_metadata.decode.cp_seq_len = cp_seq_len
else:
attn_metadata.decode_meta.num_computed_tokens_of_pcp_dcp = num_computed_tokens_of_pcp_dcp