From 18495f44b23754e7fafdf8f04f855e201e26aebe Mon Sep 17 00:00:00 2001 From: Angazenn <92204292+Angazenn@users.noreply.github.com> Date: Mon, 7 Jul 2025 20:03:02 +0800 Subject: [PATCH] [BugFix] Fix max_num_tokens_across_dp calculation bugs in attention_v1_torchair (#1636) ### What this PR does / why we need it? This PR fixes a bug that is caused by max_num_tokens_across_dp calculation. In earlier version, we compute this by graph_pad_size plus max_num_tokens(actual). This will result in different max_num_tokens_across_dp across dp ranks. If padding related is required, this might cause a wrong padding. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? CI passed normally. Signed-off-by: angazenn Co-authored-by: angazenn --- vllm_ascend/attention/attention_v1_torchair.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_ascend/attention/attention_v1_torchair.py b/vllm_ascend/attention/attention_v1_torchair.py index 46f1708..9d9b91b 100644 --- a/vllm_ascend/attention/attention_v1_torchair.py +++ b/vllm_ascend/attention/attention_v1_torchair.py @@ -273,10 +273,10 @@ class AscendAttentionTorchairMetadataBuilder: if use_torchair_graph and self.runner.attn_state in [ AscendAttentionState.DecodeOnly, ]: - max_num_tokens_across_dp += graph_pad_size pad_value = 1 padded_seq_lens = seq_lens.tolist() + [pad_value ] * graph_pad_size + max_num_tokens_across_dp = len(padded_seq_lens) seq_lens = torch.from_numpy( np.array(padded_seq_lens).astype(np.int32))