feat(attention_cp): support chunked prefill for Qwen3Next with PCP&DCP (#6900)

### What this PR does / why we need it?
Support chunked prefill for Qwen3Next with PCP&DCP

- vLLM version: v0.16.0
- vLLM main:
15d76f74e2

---------

Signed-off-by: QiuChunshuo <qiuchunshuo@huawei.com>
This commit is contained in:
Qiu
2026-03-09 17:55:09 +08:00
committed by GitHub
parent a76a509fae
commit 13adcbe44b
6 changed files with 63 additions and 63 deletions

View File

@@ -78,11 +78,11 @@ class AscendMetadataForPrefill:
local_context_lens_allranks: list[list[int]] | None = None
cp_kv_recover_idx_for_chunk: list[int] | None = None
kv_inverse_idx_for_chunk: list[int] | None = None
batch_chunk_seq_mask: list[bool] | None = None
local_total_toks: int | None = None
""" Prefill Specific Metadata for Ascend"""
pcp_metadata: AscendPCPMetadata | None = None
pcp_exit_fa_scatter_idx: torch.Tensor | None = None
chunked_context: ChunkedContextMetadata | None = None
block_tables: torch.Tensor = None
actual_seq_lengths_q: torch.Tensor = None