Replace the Kimi-K2 generated tool call idx with history tool call count (#10612)

Co-authored-by: eraser00 <eraser00@github.com>
This commit is contained in:
eraser00
2025-09-26 09:47:40 +08:00
committed by GitHub
parent 7dcd689b47
commit 0ac6114694
2 changed files with 226 additions and 15 deletions

View File

@@ -420,6 +420,181 @@ class ServingChatTestCase(unittest.TestCase):
tool_calls = payload["choices"][0]["delta"]["tool_calls"]
self.assertEqual(tool_calls[0]["id"], "functions.get_weather:0")
def test_kimi_k2_non_streaming_tool_call_id_with_history(self):
"""Ensure non-streaming tool_call.id increase with tool calls history for kimi_k2 parser."""
# Force kimi_k2 parser
self.chat.tool_call_parser = "kimi_k2"
# Prepare request with tool calls history
req = ChatCompletionRequest(
model="x",
messages=[
{"role": "user", "content": "What's the weather today in paris?"},
{
"role": "assistant",
"content": "Let me do some search first.",
"tool_calls": [
{
"id": "functions.get_weather:0",
"type": "function",
"function": {
"name": "get_weather",
"arguments": '{"city": "Paris"}',
},
}
],
},
{
"role": "tool",
"content": "It's rainy in paris now.",
"tool_call_id": "functions.get_weather:0",
},
{
"role": "assistant",
"content": "It's rainy now.",
},
{
"role": "user",
"content": "What about LA and Tokyo?",
},
],
tools=[{"type": "function", "function": {"name": "get_weather"}}],
stream=False,
)
# Mock FunctionCallParser.parse_non_stream to return one tool call
with patch(
"sglang.srt.entrypoints.openai.serving_chat.FunctionCallParser"
) as ParserMock:
parser_instance = ParserMock.return_value
# Build a mock ToolCallItem-like object
call_info = Mock()
call_info.name = "get_weather"
call_info.parameters = '{"city":"Loa Angeles"}'
# Kimi-K2 series models might generate fixed number tool_indx,
# ignoring the tool calls history and mess up all the following tool calls
call_info.tool_index = 0
call_info2 = Mock()
call_info2.name = "get_weather"
call_info2.parameters = '{"city":"Tokyo"}'
call_info2.tool_index = 1
parser_instance.has_tool_call.return_value = True
parser_instance.parse_non_stream.return_value = (
"",
[call_info, call_info2],
)
finish_reason = {"type": "stop", "matched": None}
tools = [
{"type": "function", "function": {"name": "get_weather"}},
]
history_tool_calls_cnt = self.chat._get_history_tool_calls_cnt(req)
tool_calls, remaining_text, _ = self.chat._process_tool_calls(
text="<|tool_calls_section_begin|>...",
tools=tools,
finish_reason=finish_reason,
history_tool_calls_cnt=history_tool_calls_cnt,
)
self.assertEqual(history_tool_calls_cnt, 1)
self.assertIsNotNone(tool_calls)
self.assertEqual(len(tool_calls), 2)
self.assertEqual(tool_calls[0].id, "functions.get_weather:1")
self.assertEqual(tool_calls[0].function.name, "get_weather")
self.assertEqual(tool_calls[1].id, "functions.get_weather:2")
self.assertEqual(tool_calls[1].function.name, "get_weather")
def test_kimi_k2_streaming_tool_call_id_with_history(self):
"""Ensure streaming first chunk tool_call.id increase with tool calls history for kimi_k2 parser."""
# Force kimi_k2 parser
self.chat.tool_call_parser = "kimi_k2"
# Prepare request with tool calls history
req = ChatCompletionRequest(
model="x",
messages=[
{"role": "user", "content": "What's the weather today in paris?"},
{
"role": "assistant",
"content": "Let me do some search first.",
"tool_calls": [
{
"id": "functions.get_weather:0",
"type": "function",
"function": {
"name": "get_weather",
"arguments": '{"city": "Paris"}',
},
}
],
},
{
"role": "tool",
"content": "It's rainy in paris now.",
"tool_call_id": "functions.get_weather:0",
},
{
"role": "assistant",
"content": "It's rainy now.",
},
{
"role": "user",
"content": "What about LA?",
},
],
tools=[{"type": "function", "function": {"name": "get_weather"}}],
stream=True,
)
# Patch FunctionCallParser used inside _process_tool_call_stream
with patch(
"sglang.srt.entrypoints.openai.serving_chat.FunctionCallParser"
) as ParserMock:
parser_instance = ParserMock.return_value
# First call returns one ToolCallItem-like chunk (with name)
first_chunk_call = Mock()
# Kimi-K2 series models might generate fixed number tool_indx,
# ignoring the tool calls history and mess up all the following tool calls
first_chunk_call.tool_index = 0
first_chunk_call.name = "get_weather"
first_chunk_call.parameters = ""
parser_instance.parse_stream_chunk.side_effect = [
("", [first_chunk_call]),
("", []),
]
async def collect_first_tool_chunk():
gen = self.chat._process_tool_call_stream(
index=0,
delta="irrelevant",
parser_dict={},
content={"meta_info": {"id": "chatcmpl-test"}},
request=req,
has_tool_calls={},
)
# Get first yielded SSE line
line = None
async for emitted in gen:
line = emitted
break
return line
loop = asyncio.get_event_loop()
line = loop.run_until_complete(collect_first_tool_chunk())
self.assertIsNotNone(line)
self.assertTrue(line.startswith("data: "))
payload = json.loads(line[len("data: ") :])
tool_calls = payload["choices"][0]["delta"]["tool_calls"]
self.assertEqual(tool_calls[0]["id"], "functions.get_weather:1")
if __name__ == "__main__":
unittest.main(verbosity=2)