From feab047084d5bb85637ed6595218cf659814f929 Mon Sep 17 00:00:00 2001 From: Qiu Date: Thu, 29 Jan 2026 19:35:52 +0800 Subject: [PATCH] [bugfix](pcp,gqa) set kv_inverse_idx_for_chunk and cp_kv_recover_idx_for_chunk to None when dcp only (#6317) ### What this PR does / why we need it? We only do restore and recover for pcp, so we should set `kv_inverse_idx_for_chunk` and `cp_kv_recover_idx_for_chunk` to `None` when only using dcp. - vLLM version: v0.14.1 - vLLM main: https://github.com/vllm-project/vllm/commit/dc917cceb877dfd13f98c538c4c96158047d98bd Signed-off-by: QiuChunshuo --- .../attention/context_parallel/attention_cp.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/vllm_ascend/attention/context_parallel/attention_cp.py b/vllm_ascend/attention/context_parallel/attention_cp.py index f97798f3..8bffb12f 100644 --- a/vllm_ascend/attention/context_parallel/attention_cp.py +++ b/vllm_ascend/attention/context_parallel/attention_cp.py @@ -157,10 +157,18 @@ class AscendAttentionCPMetadataBuilder(AscendAttentionMetadataBuilder): local_chunk_starts = torch.zeros( (len(local_context_lens_allranks),), dtype=torch.int32, device=self.device ) - kv_inverse_idx_for_chunk = torch.argsort( - common_long_seq_metadata.pcp_allgather_restore_idx[pcp_size * num_decode_tokens :].to(torch.float32) - ) - cp_kv_recover_idx_for_chunk = torch.argsort(kv_inverse_idx_for_chunk) + # Note(qcs): we only do restore and recover for pcp, and set these vars to None + # when only using dcp. + if self.pcp_size > 1: + kv_inverse_idx_for_chunk = torch.argsort( + common_long_seq_metadata.pcp_allgather_restore_idx[pcp_size * num_decode_tokens :].to( + torch.float32 + ) + ) + cp_kv_recover_idx_for_chunk = torch.argsort(kv_inverse_idx_for_chunk) + else: + kv_inverse_idx_for_chunk = None + cp_kv_recover_idx_for_chunk = None batch_chunk_seq_mask = local_context_lens_allranks[:, self.pcp_rank, self.dcp_rank] == 0 batch_chunk_seq_mask = torch.repeat_interleave(