Support glm4.1v and glm4.5v (#8798)
Signed-off-by: Xinyuan Tong <justinning0323@outlook.com> Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com> Co-authored-by: Xinyuan Tong <justinning0323@outlook.com> Co-authored-by: Xinyuan Tong <115166877+JustinTong0323@users.noreply.github.com> Co-authored-by: Xinyuan Tong <xinyuantong.cs@gmail.com> Co-authored-by: zRzRzRzRzRzRzR <2448370773@qq.com> Co-authored-by: Minglei Zhu <mingleizhu1122@gmail.com> Co-authored-by: Chang Su <csu272@usc.edu>
This commit is contained in:
@@ -948,5 +948,6 @@ class TestOpenAIPythonicFunctionCalling(CustomTestCase):
|
||||
# def test_function_calling_multiturn(self):
|
||||
# self._test_function_calling_multiturn()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -497,6 +497,17 @@ class TestEBNFGeneration(unittest.TestCase):
|
||||
},
|
||||
),
|
||||
),
|
||||
Tool(
|
||||
type="function",
|
||||
function=Function(
|
||||
name="empty_param_func",
|
||||
description="Function with empty parameters",
|
||||
parameters={
|
||||
"properties": {},
|
||||
"required": [],
|
||||
},
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
self.tokenizer = get_tokenizer(DEFAULT_SMALL_MODEL_NAME_FOR_TEST)
|
||||
@@ -630,16 +641,21 @@ class TestEBNFGeneration(unittest.TestCase):
|
||||
self.assertIsNotNone(ebnf)
|
||||
# Check that the EBNF contains expected patterns for XML format
|
||||
self.assertIn('"<tool_call>" function_call "</tool_call>"', ebnf)
|
||||
self.assertIn('"get_weather" "\\n" arguments_get_weather', ebnf)
|
||||
self.assertIn('"get_weather" "\\n" ( arguments_get_weather "\\n" )?', ebnf)
|
||||
self.assertIn(
|
||||
'"<arg_key>location</arg_key>" "\\n" "<arg_value>" xml_text "</arg_value>" ( "\\n" ( "<arg_key>unit</arg_key>" "\\n" "<arg_value>" ("celsius" | "fahrenheit") "</arg_value>" ) )?',
|
||||
ebnf,
|
||||
)
|
||||
self.assertIn('"search" "\\n" arguments_search', ebnf)
|
||||
self.assertIn('"search" "\\n" ( arguments_search "\\n" )?', ebnf)
|
||||
self.assertIn(
|
||||
'"<arg_key>query</arg_key>" "\\n" "<arg_value>" xml_text "</arg_value>"',
|
||||
ebnf,
|
||||
)
|
||||
self.assertIn(
|
||||
'"empty_param_func" "\\n" ( arguments_empty_param_func "\\n" )?', ebnf
|
||||
)
|
||||
self.assertIn('arguments_empty_param_func ::= ""', ebnf)
|
||||
|
||||
# Validate that the EBNF can be compiled by GrammarCompiler
|
||||
try:
|
||||
ctx = self.grammar_compiler.compile_grammar(ebnf)
|
||||
|
||||
@@ -60,6 +60,86 @@ class TestTemplateContentFormatDetection(CustomTestCase):
|
||||
result = detect_jinja_template_content_format("")
|
||||
self.assertEqual(result, "string")
|
||||
|
||||
def test_detect_msg_content_pattern(self):
|
||||
"""Test detection of template with msg.content pattern (should be 'openai' format)."""
|
||||
msg_content_pattern = """
|
||||
[gMASK]<sop>
|
||||
{%- for msg in messages %}
|
||||
{%- if msg.role == 'system' %}
|
||||
<|system|>
|
||||
{{ msg.content }}
|
||||
{%- elif msg.role == 'user' %}
|
||||
<|user|>{{ '\n' }}
|
||||
{%- if msg.content is string %}
|
||||
{{ msg.content }}
|
||||
{%- else %}
|
||||
{%- for item in msg.content %}
|
||||
{%- if item.type == 'video' or 'video' in item %}
|
||||
<|begin_of_video|><|video|><|end_of_video|>
|
||||
{%- elif item.type == 'image' or 'image' in item %}
|
||||
<|begin_of_image|><|image|><|end_of_image|>
|
||||
{%- elif item.type == 'text' %}
|
||||
{{ item.text }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{%- elif msg.role == 'assistant' %}
|
||||
{%- if msg.metadata %}
|
||||
<|assistant|>{{ msg.metadata }}
|
||||
{{ msg.content }}
|
||||
{%- else %}
|
||||
<|assistant|>
|
||||
{{ msg.content }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{% if add_generation_prompt %}<|assistant|>
|
||||
{% endif %}
|
||||
"""
|
||||
|
||||
result = detect_jinja_template_content_format(msg_content_pattern)
|
||||
self.assertEqual(result, "openai")
|
||||
|
||||
def test_detect_m_content_pattern(self):
|
||||
"""Test detection of template with m.content pattern (should be 'openai' format)."""
|
||||
msg_content_pattern = """
|
||||
[gMASK]<sop>
|
||||
{%- for m in messages %}
|
||||
{%- if m.role == 'system' %}
|
||||
<|system|>
|
||||
{{ m.content }}
|
||||
{%- elif m.role == 'user' %}
|
||||
<|user|>{{ '\n' }}
|
||||
{%- if m.content is string %}
|
||||
{{ m.content }}
|
||||
{%- else %}
|
||||
{%- for item in m.content %}
|
||||
{%- if item.type == 'video' or 'video' in item %}
|
||||
<|begin_of_video|><|video|><|end_of_video|>
|
||||
{%- elif item.type == 'image' or 'image' in item %}
|
||||
<|begin_of_image|><|image|><|end_of_image|>
|
||||
{%- elif item.type == 'text' %}
|
||||
{{ item.text }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{%- elif m.role == 'assistant' %}
|
||||
{%- if m.metadata %}
|
||||
<|assistant|>{{ m.metadata }}
|
||||
{{ m.content }}
|
||||
{%- else %}
|
||||
<|assistant|>
|
||||
{{ m.content }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{% if add_generation_prompt %}<|assistant|>
|
||||
{% endif %}
|
||||
"""
|
||||
|
||||
result = detect_jinja_template_content_format(msg_content_pattern)
|
||||
self.assertEqual(result, "openai")
|
||||
|
||||
def test_process_content_openai_format(self):
|
||||
"""Test content processing for openai format."""
|
||||
msg_dict = {
|
||||
|
||||
@@ -348,6 +348,33 @@ class TestVILAServer(TestOpenAIVisionServer):
|
||||
cls.base_url += "/v1"
|
||||
|
||||
|
||||
# Skip for ci test
|
||||
# class TestGLM41VServer(TestOpenAIVisionServer):
|
||||
# @classmethod
|
||||
# def setUpClass(cls):
|
||||
# cls.model = "zai-org/GLM-4.1V-9B-Thinking"
|
||||
# cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
# cls.api_key = "sk-123456"
|
||||
# cls.process = popen_launch_server(
|
||||
# cls.model,
|
||||
# cls.base_url,
|
||||
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
# other_args=[
|
||||
# "--trust-remote-code",
|
||||
# "--mem-fraction-static",
|
||||
# "0.68",
|
||||
# "--cuda-graph-max-bs",
|
||||
# "4",
|
||||
# "--reasoning-parser",
|
||||
# "glm45",
|
||||
# ],
|
||||
# )
|
||||
# cls.base_url += "/v1"
|
||||
|
||||
# def test_video_chat_completion(self):
|
||||
# self._test_video_chat_completion()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
del TestOpenAIVisionServer
|
||||
unittest.main()
|
||||
|
||||
@@ -96,8 +96,13 @@ class TestOpenAIVisionServer(CustomTestCase):
|
||||
), f"text: {text}, should contain cab, taxi, SUV, vehicle or car"
|
||||
# MiniCPMO fails to recognize `iron`, but `hanging`
|
||||
assert (
|
||||
"iron" in text or "hang" in text or "cloth" in text or "holding" in text
|
||||
), f"text: {text}, should contain iron, hang, cloth or holding"
|
||||
"iron" in text
|
||||
or "hang" in text
|
||||
or "cloth" in text
|
||||
or "coat" in text
|
||||
or "holding" in text
|
||||
or "outfit" in text
|
||||
), f"text: {text}, should contain iron, hang, cloth, coat or holding or outfit"
|
||||
assert response.id
|
||||
assert response.created
|
||||
assert response.usage.prompt_tokens > 0
|
||||
@@ -193,11 +198,15 @@ class TestOpenAIVisionServer(CustomTestCase):
|
||||
print(f"Multi images response:\n{text}")
|
||||
print("-" * 30)
|
||||
assert (
|
||||
"man" in text or "cab" in text or "SUV" in text or "taxi" in text
|
||||
), f"text: {text}, should contain man, cab, SUV or taxi"
|
||||
"man" in text
|
||||
or "cab" in text
|
||||
or "SUV" in text
|
||||
or "taxi" in text
|
||||
or "car" in text
|
||||
), f"text: {text}, should contain man, cab, SUV, taxi or car"
|
||||
assert (
|
||||
"logo" in text or '"S"' in text or "SG" in text
|
||||
), f"text: {text}, should contain logo, S or SG"
|
||||
"logo" in text or '"S"' in text or "SG" in text or "graphic" in text
|
||||
), f"text: {text}, should contain logo, S or SG or graphic"
|
||||
assert response.id
|
||||
assert response.created
|
||||
assert response.usage.prompt_tokens > 0
|
||||
@@ -320,11 +329,12 @@ class TestOpenAIVisionServer(CustomTestCase):
|
||||
or "individual" in video_response
|
||||
or "speaker" in video_response
|
||||
or "Steve" in video_response
|
||||
or "hand" in video_response
|
||||
), f"""
|
||||
====================== video_response =====================
|
||||
{video_response}
|
||||
===========================================================
|
||||
should contain 'man' or 'person' or 'individual' or 'speaker'
|
||||
should contain 'man' or 'person' or 'individual' or 'speaker' or 'hand'
|
||||
"""
|
||||
assert (
|
||||
"present" in video_response
|
||||
@@ -375,7 +385,8 @@ class TestOpenAIVisionServer(CustomTestCase):
|
||||
or "person" in video_response
|
||||
or "individual" in video_response
|
||||
or "speaker" in video_response
|
||||
), f"video_response: {video_response}, should either have 'man' in video_response, or 'person' in video_response, or 'individual' in video_response or 'speaker' in video_response"
|
||||
or "hand" in video_response
|
||||
), f"video_response: {video_response}, should either have 'man' in video_response, or 'person' in video_response, or 'individual' in video_response, or 'speaker' in video_response or 'hand' in video_response"
|
||||
assert (
|
||||
"present" in video_response
|
||||
or "examine" in video_response
|
||||
|
||||
Reference in New Issue
Block a user