forked from EngineX-Cambricon/enginex-mlu370-vllm
add deepseekv3 and llama4
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
"""Inference-only DeepSeek V3 Multi-Token Prediction (MTP) model."""
|
||||
import re
|
||||
from typing import Iterable, List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
@@ -18,7 +17,6 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
from vllm.sequence import IntermediateTensors
|
||||
|
||||
from .deepseek_v2 import DeepseekV2DecoderLayer
|
||||
from .utils import maybe_prefix
|
||||
|
||||
|
||||
class SharedHead(nn.Module):
|
||||
@@ -240,6 +238,11 @@ class DeepSeekMTP(nn.Module):
|
||||
and ".layers" not in name):
|
||||
continue
|
||||
|
||||
# Strip "model." prefix since DeepSeekMTP holds
|
||||
# embed_tokens and layers directly (no .model wrapper)
|
||||
if name.startswith("model."):
|
||||
name = name[len("model."):]
|
||||
|
||||
self._load_single_weight(
|
||||
name, loaded_weight, stacked_params_mapping,
|
||||
params_dict)
|
||||
|
||||
Reference in New Issue
Block a user