Sync from v0.13
This commit is contained in:
249
vllm/entrypoints/responses_utils.py
Normal file
249
vllm/entrypoints/responses_utils.py
Normal file
@@ -0,0 +1,249 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from typing import Any
|
||||
|
||||
from openai.types.chat import (
|
||||
ChatCompletionAssistantMessageParam,
|
||||
ChatCompletionMessageToolCallParam,
|
||||
ChatCompletionToolMessageParam,
|
||||
)
|
||||
from openai.types.chat.chat_completion_message_tool_call_param import (
|
||||
Function as FunctionCallTool,
|
||||
)
|
||||
from openai.types.responses import ResponseFunctionToolCall, ResponseOutputItem
|
||||
from openai.types.responses.response import ToolChoice
|
||||
from openai.types.responses.response_function_tool_call_output_item import (
|
||||
ResponseFunctionToolCallOutputItem,
|
||||
)
|
||||
from openai.types.responses.response_output_item import McpCall
|
||||
from openai.types.responses.response_output_message import ResponseOutputMessage
|
||||
from openai.types.responses.response_reasoning_item import ResponseReasoningItem
|
||||
from openai.types.responses.tool import Tool
|
||||
|
||||
from vllm import envs
|
||||
from vllm.entrypoints.constants import MCP_PREFIX
|
||||
from vllm.entrypoints.openai.protocol import (
|
||||
ChatCompletionMessageParam,
|
||||
ResponseInputOutputItem,
|
||||
)
|
||||
from vllm.utils import random_uuid
|
||||
|
||||
|
||||
def make_response_output_items_from_parsable_context(
|
||||
response_messages: list[ResponseInputOutputItem],
|
||||
) -> list[ResponseOutputItem]:
|
||||
"""Given a list of sentences, construct ResponseOutput Items."""
|
||||
output_messages: list[ResponseOutputItem] = []
|
||||
for message in response_messages:
|
||||
if not isinstance(message, ResponseFunctionToolCallOutputItem):
|
||||
output_messages.append(message)
|
||||
else:
|
||||
if len(output_messages) == 0:
|
||||
raise ValueError(
|
||||
"Cannot have a FunctionToolCallOutput before FunctionToolCall."
|
||||
)
|
||||
if isinstance(output_messages[-1], ResponseFunctionToolCall):
|
||||
mcp_message = McpCall(
|
||||
id=f"{MCP_PREFIX}{random_uuid()}",
|
||||
arguments=output_messages[-1].arguments,
|
||||
name=output_messages[-1].name,
|
||||
server_label=output_messages[
|
||||
-1
|
||||
].name, # TODO: store the server label
|
||||
type=f"{MCP_PREFIX}call",
|
||||
status="completed",
|
||||
output=message.output,
|
||||
# TODO: support error output
|
||||
)
|
||||
output_messages[-1] = mcp_message
|
||||
|
||||
return output_messages
|
||||
|
||||
|
||||
def construct_input_messages(
|
||||
*,
|
||||
request_instructions: str | None = None,
|
||||
request_input: str | list[ResponseInputOutputItem],
|
||||
prev_msg: list[ChatCompletionMessageParam] | None = None,
|
||||
prev_response_output: list[ResponseOutputItem] | None = None,
|
||||
):
|
||||
messages: list[ChatCompletionMessageParam] = []
|
||||
if request_instructions:
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": request_instructions,
|
||||
}
|
||||
)
|
||||
|
||||
# Prepend the conversation history.
|
||||
if prev_msg is not None:
|
||||
# Add the previous messages.
|
||||
messages.extend(prev_msg)
|
||||
if prev_response_output is not None:
|
||||
# Add the previous output.
|
||||
for output_item in prev_response_output:
|
||||
# NOTE: We skip the reasoning output.
|
||||
if isinstance(output_item, ResponseOutputMessage):
|
||||
for content in output_item.content:
|
||||
messages.append(
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": content.text,
|
||||
}
|
||||
)
|
||||
|
||||
# Append the new input.
|
||||
# Responses API supports simple text inputs without chat format.
|
||||
if isinstance(request_input, str):
|
||||
messages.append({"role": "user", "content": request_input})
|
||||
else:
|
||||
input_messages = construct_chat_messages_with_tool_call(request_input)
|
||||
messages.extend(input_messages)
|
||||
return messages
|
||||
|
||||
|
||||
def _maybe_combine_reasoning_and_tool_call(
|
||||
item: ResponseInputOutputItem, messages: list[ChatCompletionMessageParam]
|
||||
) -> ChatCompletionMessageParam | None:
|
||||
"""Many models treat MCP calls and reasoning as a single message.
|
||||
This function checks if the last message is a reasoning message and
|
||||
the current message is a tool call"""
|
||||
if not (
|
||||
isinstance(item, ResponseFunctionToolCall) and item.id.startswith(MCP_PREFIX)
|
||||
):
|
||||
return None
|
||||
if len(messages) == 0:
|
||||
return None
|
||||
last_message = messages[-1]
|
||||
if not (
|
||||
last_message.get("role") == "assistant"
|
||||
and last_message.get("reasoning") is not None
|
||||
):
|
||||
return None
|
||||
|
||||
last_message["tool_calls"] = [
|
||||
ChatCompletionMessageToolCallParam(
|
||||
id=item.call_id,
|
||||
function=FunctionCallTool(
|
||||
name=item.name,
|
||||
arguments=item.arguments,
|
||||
),
|
||||
type="function",
|
||||
)
|
||||
]
|
||||
return last_message
|
||||
|
||||
|
||||
def construct_chat_messages_with_tool_call(
|
||||
input_messages: list[ResponseInputOutputItem],
|
||||
) -> list[ChatCompletionMessageParam]:
|
||||
"""This function wraps _construct_single_message_from_response_item
|
||||
Because some chatMessages come from multiple response items
|
||||
for example a reasoning item and a MCP tool call are two response items
|
||||
but are one chat message
|
||||
"""
|
||||
messages: list[ChatCompletionMessageParam] = []
|
||||
for item in input_messages:
|
||||
maybe_combined_message = _maybe_combine_reasoning_and_tool_call(item, messages)
|
||||
if maybe_combined_message is not None:
|
||||
messages[-1] = maybe_combined_message
|
||||
else:
|
||||
messages.append(_construct_single_message_from_response_item(item))
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
def _construct_single_message_from_response_item(
|
||||
item: ResponseInputOutputItem,
|
||||
) -> ChatCompletionMessageParam:
|
||||
if isinstance(item, ResponseFunctionToolCall):
|
||||
# Append the function call as a tool call.
|
||||
return ChatCompletionAssistantMessageParam(
|
||||
role="assistant",
|
||||
tool_calls=[
|
||||
ChatCompletionMessageToolCallParam(
|
||||
id=item.call_id,
|
||||
function=FunctionCallTool(
|
||||
name=item.name,
|
||||
arguments=item.arguments,
|
||||
),
|
||||
type="function",
|
||||
)
|
||||
],
|
||||
)
|
||||
elif isinstance(item, ResponseReasoningItem):
|
||||
reasoning_content = ""
|
||||
if item.encrypted_content:
|
||||
raise ValueError("Encrypted content is not supported.")
|
||||
if len(item.summary) == 1:
|
||||
reasoning_content = item.summary[0].text
|
||||
elif item.content and len(item.content) == 1:
|
||||
reasoning_content = item.content[0].text
|
||||
return {
|
||||
"role": "assistant",
|
||||
"reasoning": reasoning_content,
|
||||
}
|
||||
elif isinstance(item, ResponseOutputMessage):
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": item.content[0].text,
|
||||
}
|
||||
elif isinstance(item, ResponseFunctionToolCallOutputItem):
|
||||
return ChatCompletionToolMessageParam(
|
||||
role="tool",
|
||||
content=item.output,
|
||||
tool_call_id=item.call_id,
|
||||
)
|
||||
elif isinstance(item, dict) and item.get("type") == "function_call_output":
|
||||
# Append the function call output as a tool message.
|
||||
return ChatCompletionToolMessageParam(
|
||||
role="tool",
|
||||
content=item.get("output"),
|
||||
tool_call_id=item.get("call_id"),
|
||||
)
|
||||
return item # type: ignore
|
||||
|
||||
|
||||
def extract_tool_types(tools: list[Tool]) -> set[str]:
|
||||
"""
|
||||
Extracts the tool types from the given tools.
|
||||
"""
|
||||
tool_types: set[str] = set()
|
||||
for tool in tools:
|
||||
if tool.type == "mcp":
|
||||
# Allow the MCP Tool type to enable built in tools if the
|
||||
# server_label is allowlisted in
|
||||
# envs.VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS
|
||||
if tool.server_label in envs.VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS:
|
||||
tool_types.add(tool.server_label)
|
||||
else:
|
||||
tool_types.add(tool.type)
|
||||
return tool_types
|
||||
|
||||
|
||||
def convert_tool_responses_to_completions_format(tool: dict) -> dict:
|
||||
"""
|
||||
Convert a flat tool schema:
|
||||
{"type": "function", "name": "...", "description": "...", "parameters": {...}}
|
||||
into:
|
||||
{"type": "function", "function": {...}}
|
||||
"""
|
||||
return {
|
||||
"type": "function",
|
||||
"function": tool,
|
||||
}
|
||||
|
||||
|
||||
def construct_tool_dicts(
|
||||
tools: list[Tool], tool_choice: ToolChoice
|
||||
) -> list[dict[str, Any]] | None:
|
||||
if tools is None or (tool_choice == "none"):
|
||||
tool_dicts = None
|
||||
else:
|
||||
tool_dicts = [
|
||||
convert_tool_responses_to_completions_format(tool.model_dump())
|
||||
for tool in tools
|
||||
]
|
||||
return tool_dicts
|
||||
Reference in New Issue
Block a user