feat(attention_cp): support chunked prefill for Qwen3Next with PCP&DCP (#6900)
### What this PR does / why we need it?
Support chunked prefill for Qwen3Next with PCP&DCP
- vLLM version: v0.16.0
- vLLM main:
15d76f74e2
---------
Signed-off-by: QiuChunshuo <qiuchunshuo@huawei.com>
This commit is contained in:
@@ -78,11 +78,11 @@ class AscendMetadataForPrefill:
|
||||
local_context_lens_allranks: list[list[int]] | None = None
|
||||
cp_kv_recover_idx_for_chunk: list[int] | None = None
|
||||
kv_inverse_idx_for_chunk: list[int] | None = None
|
||||
batch_chunk_seq_mask: list[bool] | None = None
|
||||
local_total_toks: int | None = None
|
||||
|
||||
""" Prefill Specific Metadata for Ascend"""
|
||||
pcp_metadata: AscendPCPMetadata | None = None
|
||||
pcp_exit_fa_scatter_idx: torch.Tensor | None = None
|
||||
chunked_context: ChunkedContextMetadata | None = None
|
||||
block_tables: torch.Tensor = None
|
||||
actual_seq_lengths_q: torch.Tensor = None
|
||||
|
||||
Reference in New Issue
Block a user