[Feat/WIP] add llava-onevision, with support for (1) siglip encoder, (2) qwen2 decoder (3) openai api compatible server. (#1123)
Co-authored-by: Bo Li <drluodian@gmail.com>
This commit is contained in:
committed by
GitHub
parent
5fafcac008
commit
a5b14ad043
@@ -34,6 +34,7 @@ class SeparatorStyle(IntEnum):
|
||||
NO_COLON_TWO = auto()
|
||||
ADD_NEW_LINE_SINGLE = auto()
|
||||
LLAMA2 = auto()
|
||||
LLAMA3 = auto()
|
||||
CHATGLM = auto()
|
||||
CHATML = auto()
|
||||
CHATINTERN = auto()
|
||||
@@ -137,6 +138,20 @@ class Conversation:
|
||||
else:
|
||||
ret += role + ":"
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.LLAMA3:
|
||||
ret = "<|begin_of_text|>"
|
||||
if self.system_message:
|
||||
ret += system_prompt
|
||||
else:
|
||||
ret += ""
|
||||
for i, (role, message) in enumerate(self.messages):
|
||||
if message:
|
||||
ret += f"<|start_header_id|>{role}<|end_header_id|>\n\n"
|
||||
ret += f"{message.strip()}<|eot_id|>"
|
||||
else:
|
||||
ret += f"<|start_header_id|>{role}<|end_header_id|>\n\n"
|
||||
# print(ret)
|
||||
return ret
|
||||
elif self.sep_style == SeparatorStyle.LLAMA2:
|
||||
seps = [self.sep, self.sep2]
|
||||
if self.system_message:
|
||||
@@ -379,12 +394,23 @@ def generate_chat_conv(
|
||||
conv.append_message(conv.roles[0], message.content)
|
||||
else:
|
||||
real_content = ""
|
||||
# calculate number of image_url
|
||||
num_image_url = 0
|
||||
for content in message.content:
|
||||
if content.type == "image_url":
|
||||
num_image_url += 1
|
||||
if num_image_url > 1:
|
||||
image_token = "<image>"
|
||||
else:
|
||||
image_token = "<image>\n"
|
||||
for content in message.content:
|
||||
if content.type == "text":
|
||||
if num_image_url > 16:
|
||||
real_content += "\n" # for video
|
||||
real_content += content.text
|
||||
elif content.type == "image_url":
|
||||
# NOTE: Only works for llava
|
||||
real_content += "<image>\n"
|
||||
real_content += image_token
|
||||
conv.append_image(content.image_url.url)
|
||||
conv.append_message(conv.roles[0], real_content)
|
||||
elif msg_role == "assistant":
|
||||
@@ -425,6 +451,18 @@ register_conv_template(
|
||||
)
|
||||
)
|
||||
|
||||
register_conv_template(
|
||||
Conversation(
|
||||
name="chatml-llava",
|
||||
system_template="<|im_start|>system\n{system_message}",
|
||||
system_message="You are a helpful assistant.",
|
||||
roles=("<|im_start|>user", "<|im_start|>assistant"),
|
||||
sep_style=SeparatorStyle.CHATML,
|
||||
sep="<|im_end|>",
|
||||
stop_str=["<|endoftext|>", "<|im_end|>"],
|
||||
)
|
||||
)
|
||||
|
||||
register_conv_template(
|
||||
Conversation(
|
||||
name="vicuna_v1.1",
|
||||
@@ -437,6 +475,17 @@ register_conv_template(
|
||||
)
|
||||
)
|
||||
|
||||
register_conv_template(
|
||||
Conversation(
|
||||
name="llava_llama_3",
|
||||
system_message="You are a helpful language and vision assistant. You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language.",
|
||||
system_template="<|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>",
|
||||
roles=("user", "assistant"),
|
||||
sep_style=SeparatorStyle.LLAMA3,
|
||||
sep="",
|
||||
stop_str=["<|end_of_text|>", "<|eot_id|>"],
|
||||
)
|
||||
)
|
||||
# Reference: https://github.com/InternLM/lmdeploy/blob/387bf54b4f124e72aab30ae9755f562e435d3d01/lmdeploy/model.py#L425-L442
|
||||
register_conv_template(
|
||||
Conversation(
|
||||
|
||||
Reference in New Issue
Block a user