diff --git a/vllm_ascend/spec_decode/mtp_proposer.py b/vllm_ascend/spec_decode/mtp_proposer.py index 462fa33f..8bd45bbe 100644 --- a/vllm_ascend/spec_decode/mtp_proposer.py +++ b/vllm_ascend/spec_decode/mtp_proposer.py @@ -780,15 +780,16 @@ class MtpProposer(Proposer): hidden_states) for layer_name in self.attn_layer_name: - if self.use_async_scheduling and attn_metadata[ - layer_name].decode is not None: - actual_size = len(attn_metadata[layer_name].decode. - actual_seq_lengths_q) + decode_metadata = getattr(attn_metadata[layer_name], + "decode", None) + if self.use_async_scheduling and decode_metadata is not None: + actual_size = len( + decode_metadata.actual_seq_lengths_q) - attn_metadata[layer_name].decode.seq_lens_list = \ - attn_metadata[layer_name].decode.seq_lens_list[:actual_size] - attn_metadata[layer_name].decode.block_table = \ - attn_metadata[layer_name].decode.block_table[:actual_size] + decode_metadata.seq_lens_list = \ + decode_metadata.seq_lens_list[:actual_size] + decode_metadata.block_table = \ + decode_metadata.block_table[:actual_size] hidden_states = self.model(input_ids=input_ids, positions=positions,