[Feature] Support minicpmv v2.6 (#2785)

Co-authored-by: Chayenne <zhaochen20@outlook.com>
Co-authored-by: yizhang2077 <1109276519@qq.com>
This commit is contained in:
Mick
2025-01-19 06:14:19 +08:00
committed by GitHub
parent c2f212d672
commit 3d93f84a00
20 changed files with 1715 additions and 139 deletions

View File

@@ -88,7 +88,6 @@ register_chat_template(
)
)
register_chat_template(
ChatTemplate(
name="claude",
@@ -101,7 +100,6 @@ register_chat_template(
)
)
register_chat_template(
ChatTemplate(
name="chatml",
@@ -116,7 +114,6 @@ register_chat_template(
)
)
register_chat_template(
ChatTemplate(
name="chatml-llava",
@@ -132,7 +129,6 @@ register_chat_template(
)
)
# There is default system prompt for qwen
# reference: https://modelscope.cn/models/qwen/Qwen2-72B-Instruct/file/view/master?fileName=tokenizer_config.json&status=1
# The chat template is: "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
@@ -219,6 +215,21 @@ register_chat_template(
)
)
# https://huggingface.co/openbmb/MiniCPM-V-2_6
register_chat_template(
ChatTemplate(
name="minicpmv",
default_system_prompt=None,
role_prefix_and_suffix={
"system": ("", " "),
"user": ("user:", " "),
"assistant": ("assistant:", "</s>"),
},
stop_str=("<|im_end|>", "<|endoftext|>"),
image_token="(<image>./</image>)",
)
)
# The difference between "llama-3-instruct-llava" and "llama-3-instruct" is that llava uses a different image_token.
register_chat_template(
ChatTemplate(