add deepseekv3 and llama4

This commit is contained in:
Chranos
2026-02-11 15:27:19 +08:00
parent 9b05d7285e
commit f6d6f69abc
2 changed files with 9 additions and 4 deletions

View File

@@ -1,5 +1,4 @@
"""Inference-only DeepSeek V3 Multi-Token Prediction (MTP) model.""" """Inference-only DeepSeek V3 Multi-Token Prediction (MTP) model."""
import re
from typing import Iterable, List, Optional, Tuple from typing import Iterable, List, Optional, Tuple
import torch import torch
@@ -18,7 +17,6 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.sequence import IntermediateTensors from vllm.sequence import IntermediateTensors
from .deepseek_v2 import DeepseekV2DecoderLayer from .deepseek_v2 import DeepseekV2DecoderLayer
from .utils import maybe_prefix
class SharedHead(nn.Module): class SharedHead(nn.Module):
@@ -240,6 +238,11 @@ class DeepSeekMTP(nn.Module):
and ".layers" not in name): and ".layers" not in name):
continue continue
# Strip "model." prefix since DeepSeekMTP holds
# embed_tokens and layers directly (no .model wrapper)
if name.startswith("model."):
name = name[len("model."):]
self._load_single_weight( self._load_single_weight(
name, loaded_weight, stacked_params_mapping, name, loaded_weight, stacked_params_mapping,
params_dict) params_dict)

View File

@@ -159,9 +159,11 @@ class MLUSpecDecodeWorker(LoraNotSupportedWorkerBase):
draft_worker_kwargs[ draft_worker_kwargs[
"model_runner_cls"] = MLUTP1DraftModelRunner "model_runner_cls"] = MLUTP1DraftModelRunner
else: else:
if draft_model_config.hf_config.model_type == "eagle": if draft_model_config.hf_config.model_type in (
"eagle", "deepseek_mtp"):
raise NotImplementedError( raise NotImplementedError(
"EAGLE does not support TP > 1 yet") f"{draft_model_config.hf_config.model_type} "
"does not support TP > 1 yet")
allow_zero_draft_token_step = False allow_zero_draft_token_step = False
proposer_worker = MLUMultiStepWorker(**draft_worker_kwargs) proposer_worker = MLUMultiStepWorker(**draft_worker_kwargs)