diff --git a/vllm-v0.6.2/vllm/model_executor/models/deepseek_mtp.py b/vllm-v0.6.2/vllm/model_executor/models/deepseek_mtp.py index 50e4ad9..da77eed 100644 --- a/vllm-v0.6.2/vllm/model_executor/models/deepseek_mtp.py +++ b/vllm-v0.6.2/vllm/model_executor/models/deepseek_mtp.py @@ -1,5 +1,4 @@ """Inference-only DeepSeek V3 Multi-Token Prediction (MTP) model.""" -import re from typing import Iterable, List, Optional, Tuple import torch @@ -18,7 +17,6 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .deepseek_v2 import DeepseekV2DecoderLayer -from .utils import maybe_prefix class SharedHead(nn.Module): @@ -240,6 +238,11 @@ class DeepSeekMTP(nn.Module): and ".layers" not in name): continue + # Strip "model." prefix since DeepSeekMTP holds + # embed_tokens and layers directly (no .model wrapper) + if name.startswith("model."): + name = name[len("model."):] + self._load_single_weight( name, loaded_weight, stacked_params_mapping, params_dict) diff --git a/vllm-v0.6.2/vllm/spec_decode/mlu_spec_decode_worker.py b/vllm-v0.6.2/vllm/spec_decode/mlu_spec_decode_worker.py index 6605343..b05dd95 100644 --- a/vllm-v0.6.2/vllm/spec_decode/mlu_spec_decode_worker.py +++ b/vllm-v0.6.2/vllm/spec_decode/mlu_spec_decode_worker.py @@ -159,9 +159,11 @@ class MLUSpecDecodeWorker(LoraNotSupportedWorkerBase): draft_worker_kwargs[ "model_runner_cls"] = MLUTP1DraftModelRunner else: - if draft_model_config.hf_config.model_type == "eagle": + if draft_model_config.hf_config.model_type in ( + "eagle", "deepseek_mtp"): raise NotImplementedError( - "EAGLE does not support TP > 1 yet") + f"{draft_model_config.hf_config.model_type} " + "does not support TP > 1 yet") allow_zero_draft_token_step = False proposer_worker = MLUMultiStepWorker(**draft_worker_kwargs)