[v0.18.0][Bugfix] fix ds3.2 dcp mtp (#7681)
### What this PR does / why we need it? Fixed the issue where the DCP overlaps the MTP scenario in the ds3.2 scenario. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? cherry-pick from: https://github.com/vllm-project/vllm-ascend/pull/7617 Signed-off-by: weiguihua2 <weiguihua2@huawei.com>
This commit is contained in:
@@ -130,6 +130,7 @@ class AscendSFAMetadata:
|
||||
num_actual_tokens: int # Number of tokens excluding padding.
|
||||
slot_mapping: torch.Tensor
|
||||
seq_lens: torch.Tensor
|
||||
seq_lens_cpu: torch.Tensor
|
||||
cum_query_lens: torch.Tensor
|
||||
block_table: torch.Tensor
|
||||
sin: torch.Tensor
|
||||
@@ -233,6 +234,7 @@ class AscendSFAMetadataBuilder(MLACommonMetadataBuilder[AscendSFAMetadata]):
|
||||
|
||||
cum_query_lens = common_attn_metadata.query_start_loc[1 : num_reqs + 1]
|
||||
seq_lens = common_attn_metadata.seq_lens[:num_reqs]
|
||||
seq_lens_cpu = common_attn_metadata.seq_lens_cpu[:num_reqs]
|
||||
|
||||
cos, sin = get_cos_and_sin_mla(input_positions, True)
|
||||
|
||||
@@ -320,6 +322,7 @@ class AscendSFAMetadataBuilder(MLACommonMetadataBuilder[AscendSFAMetadata]):
|
||||
num_actual_tokens=num_actual_tokens,
|
||||
cum_query_lens=cum_query_lens,
|
||||
seq_lens=seq_lens,
|
||||
seq_lens_cpu=seq_lens_cpu,
|
||||
slot_mapping=slot_mapping,
|
||||
head_dim=self.model_config.get_head_size(),
|
||||
attn_mask=self.attn_mask_builder.get_attention_mask(self.model_config),
|
||||
|
||||
Reference in New Issue
Block a user