Refactor MoE (#2575)

Co-authored-by: zhyncs <me@zhyncs.com>
This commit is contained in:
HandH1998
2024-12-26 00:02:14 +08:00
committed by GitHub
parent 8a56b43175
commit 53aed988cb
9 changed files with 1012 additions and 49 deletions

View File

@@ -94,7 +94,10 @@ class ModelConfig:
)
# FIXME: temporary special judge for MLA architecture
if "DeepseekV2ForCausalLM" in self.hf_config.architectures:
if (
"DeepseekV2ForCausalLM" in self.hf_config.architectures
or "DeepseekV3ForCausalLM" in self.hf_config.architectures
):
self.head_dim = 256
self.attention_arch = AttentionArch.MLA
self.kv_lora_rank = self.hf_config.kv_lora_rank