# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from typing import Any from openai.types.chat import ( ChatCompletionAssistantMessageParam, ChatCompletionMessageToolCallParam, ChatCompletionToolMessageParam, ) from openai.types.chat.chat_completion_message_tool_call_param import ( Function as FunctionCallTool, ) from openai.types.responses import ResponseFunctionToolCall, ResponseOutputItem from openai.types.responses.response import ToolChoice from openai.types.responses.response_function_tool_call_output_item import ( ResponseFunctionToolCallOutputItem, ) from openai.types.responses.response_output_item import McpCall from openai.types.responses.response_output_message import ResponseOutputMessage from openai.types.responses.response_reasoning_item import ResponseReasoningItem from openai.types.responses.tool import Tool from vllm import envs from vllm.entrypoints.constants import MCP_PREFIX from vllm.entrypoints.openai.protocol import ( ChatCompletionMessageParam, ResponseInputOutputItem, ) from vllm.utils import random_uuid def make_response_output_items_from_parsable_context( response_messages: list[ResponseInputOutputItem], ) -> list[ResponseOutputItem]: """Given a list of sentences, construct ResponseOutput Items.""" output_messages: list[ResponseOutputItem] = [] for message in response_messages: if not isinstance(message, ResponseFunctionToolCallOutputItem): output_messages.append(message) else: if len(output_messages) == 0: raise ValueError( "Cannot have a FunctionToolCallOutput before FunctionToolCall." ) if isinstance(output_messages[-1], ResponseFunctionToolCall): mcp_message = McpCall( id=f"{MCP_PREFIX}{random_uuid()}", arguments=output_messages[-1].arguments, name=output_messages[-1].name, server_label=output_messages[ -1 ].name, # TODO: store the server label type=f"{MCP_PREFIX}call", status="completed", output=message.output, # TODO: support error output ) output_messages[-1] = mcp_message return output_messages def construct_input_messages( *, request_instructions: str | None = None, request_input: str | list[ResponseInputOutputItem], prev_msg: list[ChatCompletionMessageParam] | None = None, prev_response_output: list[ResponseOutputItem] | None = None, ): messages: list[ChatCompletionMessageParam] = [] if request_instructions: messages.append( { "role": "system", "content": request_instructions, } ) # Prepend the conversation history. if prev_msg is not None: # Add the previous messages. messages.extend(prev_msg) if prev_response_output is not None: # Add the previous output. for output_item in prev_response_output: # NOTE: We skip the reasoning output. if isinstance(output_item, ResponseOutputMessage): for content in output_item.content: messages.append( { "role": "assistant", "content": content.text, } ) # Append the new input. # Responses API supports simple text inputs without chat format. if isinstance(request_input, str): messages.append({"role": "user", "content": request_input}) else: input_messages = construct_chat_messages_with_tool_call(request_input) messages.extend(input_messages) return messages def _maybe_combine_reasoning_and_tool_call( item: ResponseInputOutputItem, messages: list[ChatCompletionMessageParam] ) -> ChatCompletionMessageParam | None: """Many models treat MCP calls and reasoning as a single message. This function checks if the last message is a reasoning message and the current message is a tool call""" if not ( isinstance(item, ResponseFunctionToolCall) and item.id.startswith(MCP_PREFIX) ): return None if len(messages) == 0: return None last_message = messages[-1] if not ( last_message.get("role") == "assistant" and last_message.get("reasoning") is not None ): return None last_message["tool_calls"] = [ ChatCompletionMessageToolCallParam( id=item.call_id, function=FunctionCallTool( name=item.name, arguments=item.arguments, ), type="function", ) ] return last_message def construct_chat_messages_with_tool_call( input_messages: list[ResponseInputOutputItem], ) -> list[ChatCompletionMessageParam]: """This function wraps _construct_single_message_from_response_item Because some chatMessages come from multiple response items for example a reasoning item and a MCP tool call are two response items but are one chat message """ messages: list[ChatCompletionMessageParam] = [] for item in input_messages: maybe_combined_message = _maybe_combine_reasoning_and_tool_call(item, messages) if maybe_combined_message is not None: messages[-1] = maybe_combined_message else: messages.append(_construct_single_message_from_response_item(item)) return messages def _construct_single_message_from_response_item( item: ResponseInputOutputItem, ) -> ChatCompletionMessageParam: if isinstance(item, ResponseFunctionToolCall): # Append the function call as a tool call. return ChatCompletionAssistantMessageParam( role="assistant", tool_calls=[ ChatCompletionMessageToolCallParam( id=item.call_id, function=FunctionCallTool( name=item.name, arguments=item.arguments, ), type="function", ) ], ) elif isinstance(item, ResponseReasoningItem): reasoning_content = "" if item.encrypted_content: raise ValueError("Encrypted content is not supported.") if len(item.summary) == 1: reasoning_content = item.summary[0].text elif item.content and len(item.content) == 1: reasoning_content = item.content[0].text return { "role": "assistant", "reasoning": reasoning_content, } elif isinstance(item, ResponseOutputMessage): return { "role": "assistant", "content": item.content[0].text, } elif isinstance(item, ResponseFunctionToolCallOutputItem): return ChatCompletionToolMessageParam( role="tool", content=item.output, tool_call_id=item.call_id, ) elif isinstance(item, dict) and item.get("type") == "function_call_output": # Append the function call output as a tool message. return ChatCompletionToolMessageParam( role="tool", content=item.get("output"), tool_call_id=item.get("call_id"), ) return item # type: ignore def extract_tool_types(tools: list[Tool]) -> set[str]: """ Extracts the tool types from the given tools. """ tool_types: set[str] = set() for tool in tools: if tool.type == "mcp": # Allow the MCP Tool type to enable built in tools if the # server_label is allowlisted in # envs.VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS if tool.server_label in envs.VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS: tool_types.add(tool.server_label) else: tool_types.add(tool.type) return tool_types def convert_tool_responses_to_completions_format(tool: dict) -> dict: """ Convert a flat tool schema: {"type": "function", "name": "...", "description": "...", "parameters": {...}} into: {"type": "function", "function": {...}} """ return { "type": "function", "function": tool, } def construct_tool_dicts( tools: list[Tool], tool_choice: ToolChoice ) -> list[dict[str, Any]] | None: if tools is None or (tool_choice == "none"): tool_dicts = None else: tool_dicts = [ convert_tool_responses_to_completions_format(tool.model_dump()) for tool in tools ] return tool_dicts