[v0.18.0][Bugfix] fix ds3.2 dcp mtp (#7681)
### What this PR does / why we need it? Fixed the issue where the DCP overlaps the MTP scenario in the ds3.2 scenario. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? cherry-pick from: https://github.com/vllm-project/vllm-ascend/pull/7617 Signed-off-by: weiguihua2 <weiguihua2@huawei.com>
This commit is contained in:
@@ -168,6 +168,7 @@ class AscendMetadata:
|
||||
# should simplified these parameters once attention schema in vLLM-Ascend
|
||||
# is unified.
|
||||
seq_lens: torch.Tensor = None
|
||||
seq_lens_cpu: torch.Tensor = None
|
||||
seq_lens_list: list[int] = None # type: ignore
|
||||
actual_seq_lengths_q: list[int] = None # type: ignore
|
||||
|
||||
@@ -307,6 +308,7 @@ class AscendAttentionMetadataBuilder(AttentionMetadataBuilder[AscendMetadata]):
|
||||
block_tables=block_table,
|
||||
query_start_loc=query_start_loc,
|
||||
seq_lens=seq_lens,
|
||||
seq_lens_cpu=seq_lens,
|
||||
seq_lens_list=seq_lens.tolist(),
|
||||
max_query_len=common_attn_metadata.max_query_len,
|
||||
actual_seq_lengths_q=query_start_loc_cpu[1:].tolist(),
|
||||
|
||||
Reference in New Issue
Block a user