diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index 5158febdc..31af46bb9 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -1479,7 +1479,7 @@ async def v1_chat_completions( reasoning_parser_dict = {} async def generate_stream_resp(): - tool_call_first = True + tool_index_previous = -1 is_firsts = {} stream_buffers = {} n_prev_tokens = {} @@ -1645,6 +1645,7 @@ async def v1_chat_completions( # 2) if we found calls, we output them as separate chunk(s) for call_item in calls: + tool_index_current = call_item.tool_index # transform call_item -> FunctionResponse + ToolCall if finish_reason_type == "stop": latest_delta_len = 0 @@ -1671,7 +1672,7 @@ async def v1_chat_completions( tool_call = ToolCall( id=( f"call_{base64.urlsafe_b64encode(uuid.uuid4().bytes).rstrip(b'=').decode()}" - if tool_call_first + if tool_index_previous != tool_index_current else None ), index=call_item.tool_index, @@ -1680,7 +1681,7 @@ async def v1_chat_completions( arguments=call_item.parameters, ), ) - tool_call_first = False + tool_index_previous = tool_index_current choice_data = ChatCompletionResponseStreamChoice( index=index, delta=DeltaMessage(tool_calls=[tool_call]),