[Fix]: support deepseek-vl2-tiny model (#5552)

Co-authored-by: bppps <zouyu.zzx@alibaba-inc.com>
This commit is contained in:
ZXN
2025-04-26 17:52:53 +08:00
committed by GitHub
parent feda9b11b3
commit 04d0123fd9
6 changed files with 80 additions and 6 deletions

View File

@@ -182,7 +182,7 @@ class DeepseekVLV2Processor(ProcessorMixin):
tokenized_str, images, seq_mask, spatial_crop = self.tokenize_with_images(
messages,
pil_images[image_index : image_index + image_token_cnt],
bos=False,
bos=True,
eos=True,
cropping=len(pil_images) <= 2,
max_req_input_len=max_req_input_len,

View File

@@ -162,7 +162,9 @@ class ModelConfig:
self.attention_arch = AttentionArch.MLA
self.kv_lora_rank = self.hf_config.kv_lora_rank
self.qk_rope_head_dim = self.hf_config.qk_rope_head_dim
elif "DeepseekVL2ForCausalLM" in self.hf_config.architectures:
elif "DeepseekVL2ForCausalLM" in self.hf_config.architectures and getattr(
self.hf_text_config, "use_mla", True
):
self.head_dim = 256
self.attention_arch = AttentionArch.MLA
self.kv_lora_rank = self.hf_text_config.kv_lora_rank