forked from EngineX-Cambricon/enginex-mlu370-vllm
add deepseekv3 and llama4
This commit is contained in:
@@ -1403,6 +1403,18 @@ class SpeculativeConfig:
|
||||
|
||||
draft_hf_config = draft_model_config.hf_config
|
||||
|
||||
# Detect DeepSeek V3 MTP: same model path with
|
||||
# num_nextn_predict_layers > 0
|
||||
num_nextn = getattr(draft_hf_config,
|
||||
"num_nextn_predict_layers", 0)
|
||||
if (num_nextn and num_nextn > 0
|
||||
and getattr(draft_hf_config, "model_type", "")
|
||||
in ("deepseek_v3",)):
|
||||
draft_hf_config.model_type = "deepseek_mtp"
|
||||
draft_hf_config.architectures = ["DeepSeekMTPModel"]
|
||||
if num_speculative_tokens is None:
|
||||
num_speculative_tokens = num_nextn
|
||||
|
||||
if (num_speculative_tokens is not None
|
||||
and hasattr(draft_hf_config, "num_lookahead_tokens")):
|
||||
draft_hf_config.num_lookahead_tokens = num_speculative_tokens
|
||||
@@ -1421,7 +1433,7 @@ class SpeculativeConfig:
|
||||
f"{num_speculative_tokens=} was provided.")
|
||||
|
||||
if enable_chunked_prefill and draft_hf_config.model_type in (
|
||||
"medusa", "mlp_speculator", "eagle"):
|
||||
"medusa", "mlp_speculator", "eagle", "deepseek_mtp"):
|
||||
raise ValueError(
|
||||
"Chunked prefill and hidden-state based draft models are "
|
||||
"not compatible.")
|
||||
|
||||
Reference in New Issue
Block a user