From b399117e89b323c846186e4c618a764253bc827e Mon Sep 17 00:00:00 2001
From: weiguihua2 <weiguihua2@huawei.com>
Date: Wed, 21 Jan 2026 08:49:05 +0800
Subject: [PATCH] [Bugfix] fix pcp qwen full graph FIA bug (#6037)

### What this PR does / why we need it?
In the pcp full graph Qwen model scenario, the inconsistency between the
Q shape and actual q len of the FIA operator is fixed.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- vLLM version: v0.13.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2c24bc6996cb165fce92f780b388a5e39b3f4060

Signed-off-by: weiguihua2 <weiguihua2@huawei.com>
---
 vllm_ascend/compilation/acl_graph.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/vllm_ascend/compilation/acl_graph.py b/vllm_ascend/compilation/acl_graph.py
index 28eafed5..ea979fbf 100644
--- a/vllm_ascend/compilation/acl_graph.py
+++ b/vllm_ascend/compilation/acl_graph.py
@@ -440,11 +440,8 @@ def update_attn_dcp_pcp_params(update_stream, forward_context, runtime_shape):
                 pad_tensor = np.zeros(pad_length, dtype=actual_seq_lengths_kv.dtype)
                 actual_seq_lengths_kv = np.concatenate([actual_seq_lengths_kv, pad_tensor])
 
-            actual_seq_lengths_q = attn_metadata.actual_seq_lengths_q[: attn_metadata.num_decode_tokens]
-            if runtime_shape - len(actual_seq_lengths_q):
-                actual_seq_lengths_q = actual_seq_lengths_q + [actual_seq_lengths_q[-1]] * (
-                    runtime_shape - len(actual_seq_lengths_q)
-                )
+            actual_seq_lengths_q = attn_metadata.actual_seq_lengths_q
+
             if dcp_size > 1:
                 num_heads = num_heads * dcp_size