Support NextN (MTP) speculative decoding for DeepSeek-V3/R1 (#3582)

This commit is contained in:
Ke Bao
2025-02-15 05:28:34 +08:00
committed by GitHub
parent fb4c9c3a30
commit 862dd76c76
7 changed files with 437 additions and 7 deletions

View File

@@ -98,6 +98,7 @@ class ModelConfig:
if (
"DeepseekV2ForCausalLM" in self.hf_config.architectures
or "DeepseekV3ForCausalLM" in self.hf_config.architectures
or "DeepseekV3ForCausalLMNextN" in self.hf_config.architectures
):
self.head_dim = 256
self.attention_arch = AttentionArch.MLA