[Fix]: support deepseek-vl2-tiny model (#5552)
Co-authored-by: bppps <zouyu.zzx@alibaba-inc.com>
This commit is contained in:
@@ -182,7 +182,7 @@ class DeepseekVLV2Processor(ProcessorMixin):
|
||||
tokenized_str, images, seq_mask, spatial_crop = self.tokenize_with_images(
|
||||
messages,
|
||||
pil_images[image_index : image_index + image_token_cnt],
|
||||
bos=False,
|
||||
bos=True,
|
||||
eos=True,
|
||||
cropping=len(pil_images) <= 2,
|
||||
max_req_input_len=max_req_input_len,
|
||||
|
||||
@@ -162,7 +162,9 @@ class ModelConfig:
|
||||
self.attention_arch = AttentionArch.MLA
|
||||
self.kv_lora_rank = self.hf_config.kv_lora_rank
|
||||
self.qk_rope_head_dim = self.hf_config.qk_rope_head_dim
|
||||
elif "DeepseekVL2ForCausalLM" in self.hf_config.architectures:
|
||||
elif "DeepseekVL2ForCausalLM" in self.hf_config.architectures and getattr(
|
||||
self.hf_text_config, "use_mla", True
|
||||
):
|
||||
self.head_dim = 256
|
||||
self.attention_arch = AttentionArch.MLA
|
||||
self.kv_lora_rank = self.hf_text_config.kv_lora_rank
|
||||
|
||||
Reference in New Issue
Block a user