update
This commit is contained in:
179
vllm/entrypoints/openai/parser/responses_parser.py
Normal file
179
vllm/entrypoints/openai/parser/responses_parser.py
Normal file
@@ -0,0 +1,179 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import logging
|
||||
from collections.abc import Callable
|
||||
|
||||
from openai.types.responses import ResponseFunctionToolCall, ResponseOutputItem
|
||||
from openai.types.responses.response_function_tool_call_output_item import (
|
||||
ResponseFunctionToolCallOutputItem,
|
||||
)
|
||||
from openai.types.responses.response_output_item import McpCall
|
||||
from openai.types.responses.response_output_message import ResponseOutputMessage
|
||||
from openai.types.responses.response_output_text import ResponseOutputText
|
||||
from openai.types.responses.response_reasoning_item import (
|
||||
Content,
|
||||
ResponseReasoningItem,
|
||||
)
|
||||
|
||||
from vllm.entrypoints.constants import MCP_PREFIX
|
||||
from vllm.entrypoints.openai.responses.protocol import (
|
||||
ResponseInputOutputItem,
|
||||
ResponsesRequest,
|
||||
)
|
||||
from vllm.outputs import CompletionOutput
|
||||
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tool_parsers.abstract_tool_parser import ToolParser
|
||||
from vllm.utils import random_uuid
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ResponsesParser:
|
||||
"""Incremental parser over completion tokens with reasoning support."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
tokenizer: TokenizerLike,
|
||||
reasoning_parser_cls: Callable[[TokenizerLike], ReasoningParser],
|
||||
response_messages: list[ResponseInputOutputItem],
|
||||
request: ResponsesRequest,
|
||||
tool_parser_cls: Callable[[TokenizerLike], ToolParser] | None,
|
||||
):
|
||||
self.response_messages: list[ResponseInputOutputItem] = (
|
||||
# TODO: initial messages may not be properly typed
|
||||
response_messages
|
||||
)
|
||||
self.num_init_messages = len(response_messages)
|
||||
self.tokenizer = tokenizer
|
||||
self.request = request
|
||||
|
||||
self.reasoning_parser_instance = reasoning_parser_cls(tokenizer)
|
||||
self.tool_parser_instance = None
|
||||
if tool_parser_cls is not None:
|
||||
self.tool_parser_instance = tool_parser_cls(tokenizer)
|
||||
|
||||
# Store the last finish_reason to determine response status
|
||||
self.finish_reason: str | None = None
|
||||
|
||||
def process(self, output: CompletionOutput) -> "ResponsesParser":
|
||||
# Store the finish_reason from the output
|
||||
self.finish_reason = output.finish_reason
|
||||
|
||||
reasoning_content, content = self.reasoning_parser_instance.extract_reasoning(
|
||||
output.text, request=self.request
|
||||
)
|
||||
if reasoning_content:
|
||||
self.response_messages.append(
|
||||
ResponseReasoningItem(
|
||||
type="reasoning",
|
||||
id=f"rs_{random_uuid()}",
|
||||
summary=[],
|
||||
content=[
|
||||
Content(
|
||||
type="reasoning_text",
|
||||
text=reasoning_content,
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
function_calls: list[ResponseFunctionToolCall] = []
|
||||
if self.tool_parser_instance is not None:
|
||||
tool_call_info = self.tool_parser_instance.extract_tool_calls(
|
||||
content if content is not None else "",
|
||||
request=self.request, # type: ignore
|
||||
)
|
||||
if tool_call_info is not None and tool_call_info.tools_called:
|
||||
# extract_tool_calls() returns a list of tool calls.
|
||||
function_calls.extend(
|
||||
ResponseFunctionToolCall(
|
||||
id=f"fc_{random_uuid()}",
|
||||
call_id=f"call_{random_uuid()}",
|
||||
type="function_call",
|
||||
status="completed",
|
||||
name=tool_call.function.name,
|
||||
arguments=tool_call.function.arguments,
|
||||
)
|
||||
for tool_call in tool_call_info.tool_calls
|
||||
)
|
||||
content = tool_call_info.content
|
||||
if content and content.strip() == "":
|
||||
content = None
|
||||
|
||||
if content:
|
||||
self.response_messages.append(
|
||||
ResponseOutputMessage(
|
||||
type="message",
|
||||
id=f"msg_{random_uuid()}",
|
||||
status="completed",
|
||||
role="assistant",
|
||||
content=[
|
||||
ResponseOutputText(
|
||||
annotations=[], # TODO
|
||||
type="output_text",
|
||||
text=content,
|
||||
logprobs=None, # TODO
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
if len(function_calls) > 0:
|
||||
self.response_messages.extend(function_calls)
|
||||
|
||||
return self
|
||||
|
||||
def make_response_output_items_from_parsable_context(
|
||||
self,
|
||||
) -> list[ResponseOutputItem]:
|
||||
"""Given a list of sentences, construct ResponseOutput Items."""
|
||||
response_messages = self.response_messages[self.num_init_messages :]
|
||||
output_messages: list[ResponseOutputItem] = []
|
||||
for message in response_messages:
|
||||
if not isinstance(message, ResponseFunctionToolCallOutputItem):
|
||||
output_messages.append(message)
|
||||
else:
|
||||
if len(output_messages) == 0:
|
||||
raise ValueError(
|
||||
"Cannot have a FunctionToolCallOutput before FunctionToolCall."
|
||||
)
|
||||
if isinstance(output_messages[-1], ResponseFunctionToolCall):
|
||||
mcp_message = McpCall(
|
||||
id=f"{MCP_PREFIX}{random_uuid()}",
|
||||
arguments=output_messages[-1].arguments,
|
||||
name=output_messages[-1].name,
|
||||
server_label=output_messages[
|
||||
-1
|
||||
].name, # TODO: store the server label
|
||||
type="mcp_call",
|
||||
status="completed",
|
||||
output=message.output,
|
||||
# TODO: support error output
|
||||
)
|
||||
output_messages[-1] = mcp_message
|
||||
|
||||
return output_messages
|
||||
|
||||
|
||||
def get_responses_parser_for_simple_context(
|
||||
*,
|
||||
tokenizer: TokenizerLike,
|
||||
reasoning_parser_cls: Callable[[TokenizerLike], ReasoningParser],
|
||||
response_messages: list[ResponseInputOutputItem],
|
||||
request: ResponsesRequest,
|
||||
tool_parser_cls,
|
||||
) -> ResponsesParser:
|
||||
"""Factory function to create a ResponsesParser with
|
||||
optional reasoning parser.
|
||||
|
||||
Returns:
|
||||
ResponsesParser instance configured with the provided parser
|
||||
"""
|
||||
return ResponsesParser(
|
||||
tokenizer=tokenizer,
|
||||
reasoning_parser_cls=reasoning_parser_cls,
|
||||
response_messages=response_messages,
|
||||
request=request,
|
||||
tool_parser_cls=tool_parser_cls,
|
||||
)
|
||||
Reference in New Issue
Block a user