[Fix]: support deepseek-vl2-tiny model (#5552)

Co-authored-by: bppps <zouyu.zzx@alibaba-inc.com>
This commit is contained in:
ZXN
2025-04-26 17:52:53 +08:00
committed by GitHub
parent feda9b11b3
commit 04d0123fd9
6 changed files with 80 additions and 6 deletions

View File

@@ -162,7 +162,9 @@ class ModelConfig:
self.attention_arch = AttentionArch.MLA
self.kv_lora_rank = self.hf_config.kv_lora_rank
self.qk_rope_head_dim = self.hf_config.qk_rope_head_dim
elif "DeepseekVL2ForCausalLM" in self.hf_config.architectures:
elif "DeepseekVL2ForCausalLM" in self.hf_config.architectures and getattr(
self.hf_text_config, "use_mla", True
):
self.head_dim = 256
self.attention_arch = AttentionArch.MLA
self.kv_lora_rank = self.hf_text_config.kv_lora_rank