From e9bb4491ec9e6927e5ac24a47da75fefddc97805 Mon Sep 17 00:00:00 2001 From: whx <56632993+whx-sjtu@users.noreply.github.com> Date: Tue, 4 Nov 2025 14:06:59 +0800 Subject: [PATCH] [BugFix] Fix deepseek v3.2 mtp bug. (#3900) ### What this PR does / why we need it? This PR fixes deepseek v3.2 mtp bug. ### Does this PR introduce _any_ user-facing change? None ### How was this patch tested? All existed ci tests should pass. - vLLM version: v0.11.0 - vLLM main: https://github.com/vllm-project/vllm/commit/83f478bb19489b41e9d208b47b4bb5a95ac171ac --------- Signed-off-by: whx-sjtu <2952154980@qq.com> --- vllm_ascend/spec_decode/mtp_proposer.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/vllm_ascend/spec_decode/mtp_proposer.py b/vllm_ascend/spec_decode/mtp_proposer.py index c2a2a845..9f6d7874 100644 --- a/vllm_ascend/spec_decode/mtp_proposer.py +++ b/vllm_ascend/spec_decode/mtp_proposer.py @@ -13,6 +13,7 @@ from vllm.model_executor.model_loader import get_model_loader from vllm.model_executor.model_loader.utils import \ process_weights_after_loading from vllm.model_executor.models.deepseek_mtp import DeepSeekMTP +from vllm.model_executor.models.deepseek_v2 import DeepseekV32IndexerCache from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM from vllm.utils import cdiv from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder, @@ -139,6 +140,9 @@ class MtpProposer(Proposer): target_attn_layer_names = set( get_layers_from_vllm_config(self.vllm_config, AttentionLayerBase).keys()) + target_indexer_layer_names = set( + get_layers_from_vllm_config(self.vllm_config, + DeepseekV32IndexerCache).keys()) draft_model_config = \ self.vllm_config.speculative_config.draft_model_config target_device = self.vllm_config.device_config.device @@ -152,6 +156,13 @@ class MtpProposer(Proposer): draft_attn_layer_names = (get_layers_from_vllm_config( self.vllm_config, AttentionLayerBase).keys() - target_attn_layer_names) + indexer_layers = get_layers_from_vllm_config(self.vllm_config, + DeepseekV32IndexerCache) + draft_indexer_layer_names = indexer_layers.keys( + ) - target_indexer_layer_names + # NOTE: Currently we don't have specific attention backend and attention metadata + # for deepseek v3.2 indexer, so we just exclude the indexer layers here. + draft_attn_layer_names = draft_attn_layer_names - draft_indexer_layer_names assert len(draft_attn_layer_names) == 1 self.attn_layer_name = list(draft_attn_layer_names)