[v0.18.0][Bugfix] fix ds3.2 dcp mtp (#7681)
### What this PR does / why we need it? Fixed the issue where the DCP overlaps the MTP scenario in the ds3.2 scenario. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? cherry-pick from: https://github.com/vllm-project/vllm-ascend/pull/7617 Signed-off-by: weiguihua2 <weiguihua2@huawei.com>
This commit is contained in:
@@ -182,7 +182,7 @@ class TestAscendMLAMetadata(TestBase):
|
||||
|
||||
metadata = AscendMLAMetadata(
|
||||
num_actual_tokens_pcp_padded, num_actual_tokens, slot_mapping,
|
||||
query_start_loc, seq_lens, block_tables, num_decodes,
|
||||
query_start_loc, seq_lens, seq_lens, block_tables, num_decodes,
|
||||
num_decode_tokens, num_prefills, num_input_tokens, query_lens,
|
||||
head_dim, attn_mask, attn_state, decode, prefill)
|
||||
|
||||
|
||||
@@ -58,6 +58,7 @@ class TestAscendSFAMetadata(TestBase):
|
||||
num_actual_tokens=num_actual_tokens,
|
||||
slot_mapping=slot_mapping,
|
||||
seq_lens=seq_lens,
|
||||
seq_lens_cpu=seq_lens,
|
||||
cum_query_lens=cum_query_lens,
|
||||
block_table=block_table,
|
||||
sin=sin,
|
||||
|
||||
@@ -803,6 +803,7 @@ class TestPCPDCPGraphParams(TestBase):
|
||||
slot_mapping,
|
||||
query_start_loc,
|
||||
seq_lens,
|
||||
seq_lens,
|
||||
block_tables,
|
||||
4,
|
||||
4,
|
||||
|
||||
Reference in New Issue
Block a user