[Fix]: support deepseek-vl2-tiny model (#5552)
Co-authored-by: bppps <zouyu.zzx@alibaba-inc.com>
This commit is contained in:
@@ -162,7 +162,9 @@ class ModelConfig:
|
||||
self.attention_arch = AttentionArch.MLA
|
||||
self.kv_lora_rank = self.hf_config.kv_lora_rank
|
||||
self.qk_rope_head_dim = self.hf_config.qk_rope_head_dim
|
||||
elif "DeepseekVL2ForCausalLM" in self.hf_config.architectures:
|
||||
elif "DeepseekVL2ForCausalLM" in self.hf_config.architectures and getattr(
|
||||
self.hf_text_config, "use_mla", True
|
||||
):
|
||||
self.head_dim = 256
|
||||
self.attention_arch = AttentionArch.MLA
|
||||
self.kv_lora_rank = self.hf_text_config.kv_lora_rank
|
||||
|
||||
Reference in New Issue
Block a user