From 18495f44b23754e7fafdf8f04f855e201e26aebe Mon Sep 17 00:00:00 2001
From: Angazenn <92204292+Angazenn@users.noreply.github.com>
Date: Mon, 7 Jul 2025 20:03:02 +0800
Subject: [PATCH] [BugFix] Fix max_num_tokens_across_dp calculation bugs in
 attention_v1_torchair (#1636)

### What this PR does / why we need it?
This PR fixes a bug that is caused by max_num_tokens_across_dp
calculation. In earlier version, we compute this by graph_pad_size plus
max_num_tokens(actual). This will result in different
max_num_tokens_across_dp across dp ranks. If padding related is
required, this might cause a wrong padding.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
CI passed normally.

Signed-off-by: angazenn <zengyanjia@huawei.com>
Co-authored-by: angazenn <zengyanjia@huawei.com>
---
 vllm_ascend/attention/attention_v1_torchair.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm_ascend/attention/attention_v1_torchair.py b/vllm_ascend/attention/attention_v1_torchair.py
index 46f1708..9d9b91b 100644
--- a/vllm_ascend/attention/attention_v1_torchair.py
+++ b/vllm_ascend/attention/attention_v1_torchair.py
@@ -273,10 +273,10 @@ class AscendAttentionTorchairMetadataBuilder:
             if use_torchair_graph and self.runner.attn_state in [
                     AscendAttentionState.DecodeOnly,
             ]:
-                max_num_tokens_across_dp += graph_pad_size
                 pad_value = 1
                 padded_seq_lens = seq_lens.tolist() + [pad_value
                                                        ] * graph_pad_size
+                max_num_tokens_across_dp = len(padded_seq_lens)
 
                 seq_lens = torch.from_numpy(
                     np.array(padded_seq_lens).astype(np.int32))