[Bugfix] Remove redundant tensor creation and unused code (#656)

### What this PR does / why we need it?
Eliminated duplicate `block_table` tensor initialization and cleaned up
unused code segments. This resolves an issue where the second creation
was overwriting the first, potentially leading to unexpected behavior.

Signed-off-by: Jade Zheng <zheng.shoujian@outlook.com>
This commit is contained in:
Jade Zheng
2025-04-27 14:09:16 +08:00
committed by GitHub
parent ba3d8aae94
commit fa4a5d980e

View File

@@ -599,14 +599,6 @@ class AscendMetadataBuilder(CommonMetadataBuilder[AscendMetadata]):
max_query_len = max(query_lens) max_query_len = max(query_lens)
max_prefill_seq_len = max(self.prefill_seq_lens, default=0) max_prefill_seq_len = max(self.prefill_seq_lens, default=0)
max_decode_seq_len = max(self.curr_seq_lens, default=0) max_decode_seq_len = max(self.curr_seq_lens, default=0)
if self.num_prefills > 0:
self.attn_mask = AscendMetadataBuilder._attn_mask_builder.get_attn_mask( # type: ignore
max_prefill_seq_len,
self.input_builder.runner.model_config.dtype,
self.input_builder.runner.device)
else:
self.attn_mask = None
num_decode_tokens = self.num_decode_tokens num_decode_tokens = self.num_decode_tokens
if self.num_prefills == 0 and use_torchair_graph: if self.num_prefills == 0 and use_torchair_graph:
@@ -630,14 +622,6 @@ class AscendMetadataBuilder(CommonMetadataBuilder[AscendMetadata]):
self.input_builder.runner.device) self.input_builder.runner.device)
else: else:
self.attn_mask = None self.attn_mask = None
num_decode_tokens = self.num_decode_tokens
block_tables = make_tensor_with_pad(
self.block_tables,
pad=0,
dtype=torch.int32,
device=device,
)
assert max_query_len > 0, "query_lens: {}".format(query_lens) assert max_query_len > 0, "query_lens: {}".format(query_lens)