GLM-4.5 Model Support (#8224)

Co-authored-by: Lifu Huang <lifu.hlf@gmail.com>
Co-authored-by: Binyao Jiang <byjiang1996@gmail.com>
Co-authored-by: Stefan He <hebiaobuaa@gmail.com>
This commit is contained in:
Yuxuan Zhang
2025-07-28 13:54:07 +08:00
committed by GitHub
parent 2fd5c7049f
commit 6d6a8bc278
14 changed files with 1673 additions and 7 deletions

View File

@@ -43,6 +43,7 @@ class TestEnableThinking(CustomTestCase):
"qwen3",
],
)
cls.additional_chat_kwargs = {}
@classmethod
def tearDownClass(cls):
@@ -59,6 +60,7 @@ class TestEnableThinking(CustomTestCase):
"temperature": 0,
"separate_reasoning": True,
"chat_template_kwargs": {"enable_thinking": True},
**self.additional_chat_kwargs,
},
)
@@ -82,6 +84,7 @@ class TestEnableThinking(CustomTestCase):
"temperature": 0,
"separate_reasoning": True,
"chat_template_kwargs": {"enable_thinking": False},
**self.additional_chat_kwargs,
},
)
@@ -107,6 +110,7 @@ class TestEnableThinking(CustomTestCase):
"separate_reasoning": True,
"stream": True,
"chat_template_kwargs": {"enable_thinking": True},
**self.additional_chat_kwargs,
},
stream=True,
)
@@ -151,6 +155,7 @@ class TestEnableThinking(CustomTestCase):
"separate_reasoning": True,
"stream": True,
"chat_template_kwargs": {"enable_thinking": False},
**self.additional_chat_kwargs,
},
stream=True,
)
@@ -184,5 +189,55 @@ class TestEnableThinking(CustomTestCase):
)
## Skip for ci test
# class TestGLM45EnableThinking(TestEnableThinking):
# @classmethod
# def setUpClass(cls):
# # Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST
# cls.model = "THUDM/GLM-4.5"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-1234"
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# api_key=cls.api_key,
# other_args=[
# "--tool-call-parser",
# "glm45",
# "--reasoning-parser",
# "glm45",
# "--tp-size",
# "8"
# ],
# )
# # Validate whether enable-thinking conflict with tool_calls
# cls.additional_chat_kwargs = {
# "tools": [
# {
# "type": "function",
# "function": {
# "name": "add",
# "description": "Compute the sum of two numbers",
# "parameters": {
# "type": "object",
# "properties": {
# "a": {
# "type": "int",
# "description": "A number",
# },
# "b": {
# "type": "int",
# "description": "A number",
# },
# },
# "required": ["a", "b"],
# },
# },
# }
# ]
# }
if __name__ == "__main__":
unittest.main()

View File

@@ -223,7 +223,10 @@ class TestOpenAIServerFunctionCalling(CustomTestCase):
messages = [
{"role": "system", "content": self.SYSTEM_MESSAGE},
{"role": "user", "content": "What is the temperature in Paris?"},
{
"role": "user",
"content": "What is the temperature in Paris in celsius??",
},
]
response_stream = client.chat.completions.create(
@@ -910,5 +913,40 @@ class TestOpenAIPythonicFunctionCalling(CustomTestCase):
)
## Skip for ci test
# class TestGLM45ServerFunctionCalling(TestOpenAIServerFunctionCalling):
# @classmethod
# def setUpClass(cls):
# # Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST
# cls.model = "THUDM/GLM-4.5"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-123456"
# # Start the local OpenAI Server. If necessary, you can add other parameters such as --enable-tools.
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# api_key=cls.api_key,
# other_args=[
# # If your server needs extra parameters to test function calling, please add them here.
# "--tool-call-parser",
# "glm45",
# "--reasoning-parser",
# "glm45",
# "--tp-size",
# "8"
# ],
# )
# cls.base_url += "/v1"
# cls.tokenizer = get_tokenizer(cls.model)
# # This test is too difficult for GLM4-moe. Skip it from the UT
# def test_function_call_required(self):
# pass
# def test_function_calling_multiturn(self):
# self._test_function_calling_multiturn()
if __name__ == "__main__":
unittest.main()