diff --git a/python/sglang/srt/entrypoints/openai/serving_chat.py b/python/sglang/srt/entrypoints/openai/serving_chat.py index e69587432..9889cb2ed 100644 --- a/python/sglang/srt/entrypoints/openai/serving_chat.py +++ b/python/sglang/srt/entrypoints/openai/serving_chat.py @@ -484,7 +484,10 @@ class OpenAIServingChat(OpenAIServingBase): # Handle tool calls if request.tool_choice != "none" and request.tools: - async for chunk in self._process_tool_call_stream( + async for ( + chunk, + tool_call_finish_reason_type, + ) in self._process_tool_call_stream( index, delta, parser_dict, @@ -492,7 +495,10 @@ class OpenAIServingChat(OpenAIServingBase): request, finish_reason_type, ): - yield chunk + if chunk: + yield chunk + finish_reason_type = tool_call_finish_reason_type + else: # Regular content if delta or not ( @@ -865,7 +871,7 @@ class OpenAIServingChat(OpenAIServingBase): choices=[choice_data], model=request.model, ) - yield f"data: {chunk.model_dump_json()}\n\n" + yield f"data: {chunk.model_dump_json()}\n\n", finish_reason_type # Yield tool calls for call_item in calls: @@ -920,4 +926,7 @@ class OpenAIServingChat(OpenAIServingBase): choices=[choice_data], model=request.model, ) - yield f"data: {chunk.model_dump_json()}\n\n" + yield f"data: {chunk.model_dump_json()}\n\n", finish_reason_type + + if finish_reason_type == "stop": + yield None, "tool_calls" diff --git a/test/srt/openai_server/function_call/test_openai_function_calling.py b/test/srt/openai_server/function_call/test_openai_function_calling.py index 012fc15c5..8b437a8ac 100644 --- a/test/srt/openai_server/function_call/test_openai_function_calling.py +++ b/test/srt/openai_server/function_call/test_openai_function_calling.py @@ -159,6 +159,13 @@ class TestOpenAIServerFunctionCalling(CustomTestCase): "Target function name 'get_current_weather' was not found in the streaming chunks", ) + finish_reason = chunks[-1].choices[0].finish_reason + self.assertEqual( + finish_reason, + "tool_calls", + "Final response of function calling should have finish_reason 'tool_calls'", + ) + def test_function_calling_streaming_args_parsing(self): """ Test: Whether the function call arguments returned in streaming mode can be correctly concatenated into valid JSON.