[v0.18.0][Bugfix] fix ds3.2 dcp mtp (#7681)
### What this PR does / why we need it? Fixed the issue where the DCP overlaps the MTP scenario in the ds3.2 scenario. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? cherry-pick from: https://github.com/vllm-project/vllm-ascend/pull/7617 Signed-off-by: weiguihua2 <weiguihua2@huawei.com>
This commit is contained in:
@@ -174,6 +174,7 @@ class AscendMLAMetadata:
|
||||
slot_mapping: torch.Tensor
|
||||
query_start_loc: torch.Tensor
|
||||
seq_lens: torch.Tensor
|
||||
seq_lens_cpu: torch.Tensor
|
||||
block_tables: torch.Tensor
|
||||
|
||||
# New for MLA (compared to FlashAttention)
|
||||
@@ -457,6 +458,7 @@ class AscendMLAMetadataBuilder(MLACommonMetadataBuilder[AscendMLAMetadata]):
|
||||
query_start_loc=query_start_loc,
|
||||
block_tables=self.block_table,
|
||||
seq_lens=self.seq_lens,
|
||||
seq_lens_cpu=self.seq_lens,
|
||||
)
|
||||
|
||||
def build_chunked_metadata(
|
||||
|
||||
Reference in New Issue
Block a user