From fa4a5d980e8845a88b9162cf169f0a5ab230f8a5 Mon Sep 17 00:00:00 2001 From: Jade Zheng Date: Sun, 27 Apr 2025 14:09:16 +0800 Subject: [PATCH] [Bugfix] Remove redundant tensor creation and unused code (#656) ### What this PR does / why we need it? Eliminated duplicate `block_table` tensor initialization and cleaned up unused code segments. This resolves an issue where the second creation was overwriting the first, potentially leading to unexpected behavior. Signed-off-by: Jade Zheng --- vllm_ascend/attention/attention.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/vllm_ascend/attention/attention.py b/vllm_ascend/attention/attention.py index 2e0262c..8e5a1ba 100644 --- a/vllm_ascend/attention/attention.py +++ b/vllm_ascend/attention/attention.py @@ -599,14 +599,6 @@ class AscendMetadataBuilder(CommonMetadataBuilder[AscendMetadata]): max_query_len = max(query_lens) max_prefill_seq_len = max(self.prefill_seq_lens, default=0) max_decode_seq_len = max(self.curr_seq_lens, default=0) - - if self.num_prefills > 0: - self.attn_mask = AscendMetadataBuilder._attn_mask_builder.get_attn_mask( # type: ignore - max_prefill_seq_len, - self.input_builder.runner.model_config.dtype, - self.input_builder.runner.device) - else: - self.attn_mask = None num_decode_tokens = self.num_decode_tokens if self.num_prefills == 0 and use_torchair_graph: @@ -630,14 +622,6 @@ class AscendMetadataBuilder(CommonMetadataBuilder[AscendMetadata]): self.input_builder.runner.device) else: self.attn_mask = None - num_decode_tokens = self.num_decode_tokens - - block_tables = make_tensor_with_pad( - self.block_tables, - pad=0, - dtype=torch.int32, - device=device, - ) assert max_query_len > 0, "query_lens: {}".format(query_lens)