[v0.18.0][Bugfix] fix ds3.2 dcp mtp (#7681)
### What this PR does / why we need it? Fixed the issue where the DCP overlaps the MTP scenario in the ds3.2 scenario. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? cherry-pick from: https://github.com/vllm-project/vllm-ascend/pull/7617 Signed-off-by: weiguihua2 <weiguihua2@huawei.com>
This commit is contained in:
@@ -600,7 +600,7 @@ class SpecDecodeBaseProposer(EagleProposer):
|
||||
- 1
|
||||
)
|
||||
num_accept_tokens = query_lens_d.to(self.device) - num_reject_tokens
|
||||
ori_seq_len = attn_metadata_i.seq_lens[:batch_size].clone()
|
||||
ori_seq_len = attn_metadata_i.seq_lens_cpu[:batch_size].clone()
|
||||
mtp_slot_mapping = self.runner.pcp_manager.mtp_slot_pad
|
||||
|
||||
# slot_mapping index base offset:
|
||||
@@ -1247,7 +1247,8 @@ class SpecDecodeBaseProposer(EagleProposer):
|
||||
|
||||
if self.pcp_size * self.dcp_size > 1:
|
||||
if self.vllm_config.model_config.use_mla:
|
||||
attn_metadata.decode.cp_seq_len = cp_seq_len
|
||||
if getattr(attn_metadata, "decode", None):
|
||||
attn_metadata.decode.cp_seq_len = cp_seq_len
|
||||
else:
|
||||
attn_metadata.decode_meta.num_computed_tokens_of_pcp_dcp = num_computed_tokens_of_pcp_dcp
|
||||
|
||||
|
||||
Reference in New Issue
Block a user