[Cleanup] Remove unused attn_metadata parameter from Proposer classes (#4862)

The `attn_metadata` is not used by any draft proposer, so we can remove it. - vLLM version: v0.12.0 - vLLM main: ad32e3e19c --------- Signed-off-by: Jade Zheng <zheng.shoujian@outlook.com>
2025-12-15 21:21:38 +08:00
parent a9625851ef
commit c064d11fd7
7 changed files with 1 additions and 21 deletions
--- a/vllm_ascend/spec_decode/mtp_proposer.py
+++ b/vllm_ascend/spec_decode/mtp_proposer.py
@@ -51,10 +51,6 @@ _MTP_MODELS = {
    ("vllm.model_executor.models.qwen3_next_mtp", "Qwen3NextMTP")
 }

-_DEFAULT_FIRST_LAYER = 'model.layers.0.self_attn.attn'
-
-_FIRST_LAYERS = {"Qwen3NextForCausalLM": 'model.layers.3.self_attn.attn'}
-

 def _load_model(architecture):
    if architecture not in _MTP_MODELS:
@@ -345,10 +341,8 @@ class MtpProposer(Proposer):
                           positions: torch.Tensor = None,
                           num_scheduled_tokens: int = 0,
                           hidden_states: torch.Tensor = None,
-                           attn_metadata=None,
                           aux_hidden_states: torch.Tensor = None):
        common_attn_metadata = self.runner.spec_decode_common_attn_metadata
-        attn_metadata = self._get_attn_metadata(attn_metadata)

        if self.speculative_config.disable_padded_drafter_batch:
            # When padded-batch is disabled, the sampled_token_ids should be
@@ -487,14 +481,6 @@ class MtpProposer(Proposer):
        model = _load_model(architecture)
        self.model = model(vllm_config=self.vllm_config).to(target_device)

-    def _get_attn_metadata(self, attn_metadata):
-        if attn_metadata is not None and isinstance(attn_metadata, dict):
-            architecture = self.vllm_config.model_config.architecture
-            layer_name = _FIRST_LAYERS.get(architecture, _DEFAULT_FIRST_LAYER)
-            attn_metadata = attn_metadata[layer_name]
-
-        return attn_metadata
-
    def _prepare_inputs(
        self,
        common_attn_metadata: CommonAttentionMetadata,