add deepseekv3 and llama4

2026-02-11 15:24:13 +08:00
parent 2ad23aa8da
commit c584139543
5 changed files with 320 additions and 9 deletions
--- a/vllm-v0.6.2/vllm/config.py
+++ b/vllm-v0.6.2/vllm/config.py
@@ -1403,6 +1403,18 @@ class SpeculativeConfig:

            draft_hf_config = draft_model_config.hf_config

+            # Detect DeepSeek V3 MTP: same model path with
+            # num_nextn_predict_layers > 0
+            num_nextn = getattr(draft_hf_config,
+                                "num_nextn_predict_layers", 0)
+            if (num_nextn and num_nextn > 0
+                    and getattr(draft_hf_config, "model_type", "")
+                    in ("deepseek_v3",)):
+                draft_hf_config.model_type = "deepseek_mtp"
+                draft_hf_config.architectures = ["DeepSeekMTPModel"]
+                if num_speculative_tokens is None:
+                    num_speculative_tokens = num_nextn
+
            if (num_speculative_tokens is not None
                    and hasattr(draft_hf_config, "num_lookahead_tokens")):
                draft_hf_config.num_lookahead_tokens = num_speculative_tokens
@@ -1421,7 +1433,7 @@ class SpeculativeConfig:
                        f"{num_speculative_tokens=} was provided.")

            if enable_chunked_prefill and draft_hf_config.model_type in (
-                    "medusa", "mlp_speculator", "eagle"):
+                    "medusa", "mlp_speculator", "eagle", "deepseek_mtp"):
                raise ValueError(
                    "Chunked prefill and hidden-state based draft models are "
                    "not compatible.")