Support glm4.1v and glm4.5v (#8798)

Signed-off-by: Xinyuan Tong <justinning0323@outlook.com>
Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
Co-authored-by: Xinyuan Tong <justinning0323@outlook.com>
Co-authored-by: Xinyuan Tong <115166877+JustinTong0323@users.noreply.github.com>
Co-authored-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
Co-authored-by: zRzRzRzRzRzRzR <2448370773@qq.com>
Co-authored-by: Minglei Zhu <mingleizhu1122@gmail.com>
Co-authored-by: Chang Su <csu272@usc.edu>
This commit is contained in:
Binyao Jiang
2025-08-09 00:59:13 -07:00
committed by GitHub
parent faa25df1ae
commit f29aba8c6e
21 changed files with 1584 additions and 19 deletions

View File

@@ -948,5 +948,6 @@ class TestOpenAIPythonicFunctionCalling(CustomTestCase):
# def test_function_calling_multiturn(self):
# self._test_function_calling_multiturn()
if __name__ == "__main__":
unittest.main()

View File

@@ -497,6 +497,17 @@ class TestEBNFGeneration(unittest.TestCase):
},
),
),
Tool(
type="function",
function=Function(
name="empty_param_func",
description="Function with empty parameters",
parameters={
"properties": {},
"required": [],
},
),
),
]
self.tokenizer = get_tokenizer(DEFAULT_SMALL_MODEL_NAME_FOR_TEST)
@@ -630,16 +641,21 @@ class TestEBNFGeneration(unittest.TestCase):
self.assertIsNotNone(ebnf)
# Check that the EBNF contains expected patterns for XML format
self.assertIn('"<tool_call>" function_call "</tool_call>"', ebnf)
self.assertIn('"get_weather" "\\n" arguments_get_weather', ebnf)
self.assertIn('"get_weather" "\\n" ( arguments_get_weather "\\n" )?', ebnf)
self.assertIn(
'"<arg_key>location</arg_key>" "\\n" "<arg_value>" xml_text "</arg_value>" ( "\\n" ( "<arg_key>unit</arg_key>" "\\n" "<arg_value>" ("celsius" | "fahrenheit") "</arg_value>" ) )?',
ebnf,
)
self.assertIn('"search" "\\n" arguments_search', ebnf)
self.assertIn('"search" "\\n" ( arguments_search "\\n" )?', ebnf)
self.assertIn(
'"<arg_key>query</arg_key>" "\\n" "<arg_value>" xml_text "</arg_value>"',
ebnf,
)
self.assertIn(
'"empty_param_func" "\\n" ( arguments_empty_param_func "\\n" )?', ebnf
)
self.assertIn('arguments_empty_param_func ::= ""', ebnf)
# Validate that the EBNF can be compiled by GrammarCompiler
try:
ctx = self.grammar_compiler.compile_grammar(ebnf)

View File

@@ -60,6 +60,86 @@ class TestTemplateContentFormatDetection(CustomTestCase):
result = detect_jinja_template_content_format("")
self.assertEqual(result, "string")
def test_detect_msg_content_pattern(self):
"""Test detection of template with msg.content pattern (should be 'openai' format)."""
msg_content_pattern = """
[gMASK]<sop>
{%- for msg in messages %}
{%- if msg.role == 'system' %}
<|system|>
{{ msg.content }}
{%- elif msg.role == 'user' %}
<|user|>{{ '\n' }}
{%- if msg.content is string %}
{{ msg.content }}
{%- else %}
{%- for item in msg.content %}
{%- if item.type == 'video' or 'video' in item %}
<|begin_of_video|><|video|><|end_of_video|>
{%- elif item.type == 'image' or 'image' in item %}
<|begin_of_image|><|image|><|end_of_image|>
{%- elif item.type == 'text' %}
{{ item.text }}
{%- endif %}
{%- endfor %}
{%- endif %}
{%- elif msg.role == 'assistant' %}
{%- if msg.metadata %}
<|assistant|>{{ msg.metadata }}
{{ msg.content }}
{%- else %}
<|assistant|>
{{ msg.content }}
{%- endif %}
{%- endif %}
{%- endfor %}
{% if add_generation_prompt %}<|assistant|>
{% endif %}
"""
result = detect_jinja_template_content_format(msg_content_pattern)
self.assertEqual(result, "openai")
def test_detect_m_content_pattern(self):
"""Test detection of template with m.content pattern (should be 'openai' format)."""
msg_content_pattern = """
[gMASK]<sop>
{%- for m in messages %}
{%- if m.role == 'system' %}
<|system|>
{{ m.content }}
{%- elif m.role == 'user' %}
<|user|>{{ '\n' }}
{%- if m.content is string %}
{{ m.content }}
{%- else %}
{%- for item in m.content %}
{%- if item.type == 'video' or 'video' in item %}
<|begin_of_video|><|video|><|end_of_video|>
{%- elif item.type == 'image' or 'image' in item %}
<|begin_of_image|><|image|><|end_of_image|>
{%- elif item.type == 'text' %}
{{ item.text }}
{%- endif %}
{%- endfor %}
{%- endif %}
{%- elif m.role == 'assistant' %}
{%- if m.metadata %}
<|assistant|>{{ m.metadata }}
{{ m.content }}
{%- else %}
<|assistant|>
{{ m.content }}
{%- endif %}
{%- endif %}
{%- endfor %}
{% if add_generation_prompt %}<|assistant|>
{% endif %}
"""
result = detect_jinja_template_content_format(msg_content_pattern)
self.assertEqual(result, "openai")
def test_process_content_openai_format(self):
"""Test content processing for openai format."""
msg_dict = {

View File

@@ -348,6 +348,33 @@ class TestVILAServer(TestOpenAIVisionServer):
cls.base_url += "/v1"
# Skip for ci test
# class TestGLM41VServer(TestOpenAIVisionServer):
# @classmethod
# def setUpClass(cls):
# cls.model = "zai-org/GLM-4.1V-9B-Thinking"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-123456"
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# other_args=[
# "--trust-remote-code",
# "--mem-fraction-static",
# "0.68",
# "--cuda-graph-max-bs",
# "4",
# "--reasoning-parser",
# "glm45",
# ],
# )
# cls.base_url += "/v1"
# def test_video_chat_completion(self):
# self._test_video_chat_completion()
if __name__ == "__main__":
del TestOpenAIVisionServer
unittest.main()

View File

@@ -96,8 +96,13 @@ class TestOpenAIVisionServer(CustomTestCase):
), f"text: {text}, should contain cab, taxi, SUV, vehicle or car"
# MiniCPMO fails to recognize `iron`, but `hanging`
assert (
"iron" in text or "hang" in text or "cloth" in text or "holding" in text
), f"text: {text}, should contain iron, hang, cloth or holding"
"iron" in text
or "hang" in text
or "cloth" in text
or "coat" in text
or "holding" in text
or "outfit" in text
), f"text: {text}, should contain iron, hang, cloth, coat or holding or outfit"
assert response.id
assert response.created
assert response.usage.prompt_tokens > 0
@@ -193,11 +198,15 @@ class TestOpenAIVisionServer(CustomTestCase):
print(f"Multi images response:\n{text}")
print("-" * 30)
assert (
"man" in text or "cab" in text or "SUV" in text or "taxi" in text
), f"text: {text}, should contain man, cab, SUV or taxi"
"man" in text
or "cab" in text
or "SUV" in text
or "taxi" in text
or "car" in text
), f"text: {text}, should contain man, cab, SUV, taxi or car"
assert (
"logo" in text or '"S"' in text or "SG" in text
), f"text: {text}, should contain logo, S or SG"
"logo" in text or '"S"' in text or "SG" in text or "graphic" in text
), f"text: {text}, should contain logo, S or SG or graphic"
assert response.id
assert response.created
assert response.usage.prompt_tokens > 0
@@ -320,11 +329,12 @@ class TestOpenAIVisionServer(CustomTestCase):
or "individual" in video_response
or "speaker" in video_response
or "Steve" in video_response
or "hand" in video_response
), f"""
====================== video_response =====================
{video_response}
===========================================================
should contain 'man' or 'person' or 'individual' or 'speaker'
should contain 'man' or 'person' or 'individual' or 'speaker' or 'hand'
"""
assert (
"present" in video_response
@@ -375,7 +385,8 @@ class TestOpenAIVisionServer(CustomTestCase):
or "person" in video_response
or "individual" in video_response
or "speaker" in video_response
), f"video_response: {video_response}, should either have 'man' in video_response, or 'person' in video_response, or 'individual' in video_response or 'speaker' in video_response"
or "hand" in video_response
), f"video_response: {video_response}, should either have 'man' in video_response, or 'person' in video_response, or 'individual' in video_response, or 'speaker' in video_response or 'hand' in video_response"
assert (
"present" in video_response
or "examine" in video_response