add deepseekv3 and llama4
This commit is contained in:
@@ -1,5 +1,4 @@
|
|||||||
"""Inference-only DeepSeek V3 Multi-Token Prediction (MTP) model."""
|
"""Inference-only DeepSeek V3 Multi-Token Prediction (MTP) model."""
|
||||||
import re
|
|
||||||
from typing import Iterable, List, Optional, Tuple
|
from typing import Iterable, List, Optional, Tuple
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
@@ -18,7 +17,6 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata
|
|||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
|
|
||||||
from .deepseek_v2 import DeepseekV2DecoderLayer
|
from .deepseek_v2 import DeepseekV2DecoderLayer
|
||||||
from .utils import maybe_prefix
|
|
||||||
|
|
||||||
|
|
||||||
class SharedHead(nn.Module):
|
class SharedHead(nn.Module):
|
||||||
@@ -240,6 +238,11 @@ class DeepSeekMTP(nn.Module):
|
|||||||
and ".layers" not in name):
|
and ".layers" not in name):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Strip "model." prefix since DeepSeekMTP holds
|
||||||
|
# embed_tokens and layers directly (no .model wrapper)
|
||||||
|
if name.startswith("model."):
|
||||||
|
name = name[len("model."):]
|
||||||
|
|
||||||
self._load_single_weight(
|
self._load_single_weight(
|
||||||
name, loaded_weight, stacked_params_mapping,
|
name, loaded_weight, stacked_params_mapping,
|
||||||
params_dict)
|
params_dict)
|
||||||
|
|||||||
@@ -159,9 +159,11 @@ class MLUSpecDecodeWorker(LoraNotSupportedWorkerBase):
|
|||||||
draft_worker_kwargs[
|
draft_worker_kwargs[
|
||||||
"model_runner_cls"] = MLUTP1DraftModelRunner
|
"model_runner_cls"] = MLUTP1DraftModelRunner
|
||||||
else:
|
else:
|
||||||
if draft_model_config.hf_config.model_type == "eagle":
|
if draft_model_config.hf_config.model_type in (
|
||||||
|
"eagle", "deepseek_mtp"):
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
"EAGLE does not support TP > 1 yet")
|
f"{draft_model_config.hf_config.model_type} "
|
||||||
|
"does not support TP > 1 yet")
|
||||||
|
|
||||||
allow_zero_draft_token_step = False
|
allow_zero_draft_token_step = False
|
||||||
proposer_worker = MLUMultiStepWorker(**draft_worker_kwargs)
|
proposer_worker = MLUMultiStepWorker(**draft_worker_kwargs)
|
||||||
|
|||||||
Reference in New Issue
Block a user