From cbc987db0bfcea976f9ade026a8c0879b1d787f4 Mon Sep 17 00:00:00 2001 From: Feng Liu <46866849+ader47@users.noreply.github.com> Date: Wed, 7 Jan 2026 10:01:27 +0800 Subject: [PATCH] [bugfix (pcp)] fix chunked prefill accurancy issue (#5647) ### What this PR does / why we need it? Purpose: initialize padded slot mapping buffer to prevent garbage values. In PCP mode, the `pcp_padded_slot_mapping` buffer is reused across invocations. Without explicit initialization, this buffer retain stale values from previous runs, which can lead to incorrect results. This change ensures the buffer is filled with -1. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2f4e6548efec402b913ffddc8726230d9311948d --------- Signed-off-by: F.Liu Co-authored-by: F.Liu --- vllm_ascend/worker/pcp_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm_ascend/worker/pcp_utils.py b/vllm_ascend/worker/pcp_utils.py index dc746b8b..cded9f54 100644 --- a/vllm_ascend/worker/pcp_utils.py +++ b/vllm_ascend/worker/pcp_utils.py @@ -319,6 +319,7 @@ class PCPManager: pcp_world_size] cp_unpad_mask = self.pcp_unpad_mask_cpu_tensor[:num_tokens * self.pcp_world_size] + pcp_padded_slot_mapping.fill_(-1) pcp_padded_slot_mapping[cp_unpad_mask] = slot_mapping return pcp_padded_slot_mapping