enginex-bi_150-vllm/vllm/parser/abstract_parser.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import json
from abc import abstractmethod
from collections.abc import Sequence
from functools import cached_property

from openai.types.responses import (
    ResponseFunctionToolCall,
    ResponseOutputItem,
    ResponseOutputMessage,
    ResponseOutputText,
    ResponseReasoningItem,
    ToolChoiceFunction,
)
from openai.types.responses.response_output_text import Logprob
from openai.types.responses.response_reasoning_item import (
    Content as ResponseReasoningTextContent,
)
from pydantic import TypeAdapter

from vllm.entrypoints.chat_utils import make_tool_call_id
from vllm.entrypoints.openai.chat_completion.protocol import (
    ChatCompletionNamedToolChoiceParam,
    ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import (
    DeltaMessage,
    ExtractedToolCallInformation,
    FunctionCall,
    FunctionDefinition,
)
from vllm.entrypoints.openai.responses.protocol import (
    ResponsesRequest,
)
from vllm.logger import init_logger
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import ToolParser
from vllm.utils import random_uuid

logger = init_logger(__name__)


class Parser:
    """
    Abstract Parser class that unifies ReasoningParser and ToolParser into
    a single interface for parsing model output.

    This class provides a unified way to handle both reasoning extraction
    (e.g., chain-of-thought content in <think> tags) and tool call extraction
    (e.g., function calls in XML/JSON format) from model outputs.

    Subclasses can either:
    1. Override the abstract methods directly for custom parsing logic
    2. Set `reasoning_parser` and `tool_parser` properties to delegate to
       existing parser implementations

    Class Attributes:
        reasoning_parser_cls: The ReasoningParser class to use (for compatibility
            with code that needs the class, not instance).
        tool_parser_cls: The ToolParser class to use (for compatibility with
            code that needs the class, not instance).
    """

    # Class-level parser classes for compatibility with existing patterns
    # Subclasses should override these if they use specific parser classes
    reasoning_parser_cls: type[ReasoningParser] | None = None
    tool_parser_cls: type[ToolParser] | None = None

    def __init__(self, tokenizer: TokenizerLike, *args, **kwargs):
        """
        Initialize the Parser.

        Args:
            tokenizer: The tokenizer used by the model. This is required for
                token-based parsing operations.
        """
        self.model_tokenizer = tokenizer
        self._reasoning_parser: ReasoningParser | None = None
        self._tool_parser: ToolParser | None = None

    @cached_property
    def vocab(self) -> dict[str, int]:
        """Get the vocabulary mapping from tokens to IDs."""
        return self.model_tokenizer.get_vocab()

    @property
    def reasoning_parser(self) -> ReasoningParser | None:
        """The underlying reasoning parser, if any."""
        return self._reasoning_parser

    @reasoning_parser.setter
    def reasoning_parser(self, parser: ReasoningParser | None) -> None:
        self._reasoning_parser = parser

    @property
    def tool_parser(self) -> ToolParser | None:
        """The underlying tool parser, if any."""
        return self._tool_parser

    @tool_parser.setter
    def tool_parser(self, parser: ToolParser | None) -> None:
        self._tool_parser = parser

    # ========== Reasoning Parser Methods ==========

    @abstractmethod
    def is_reasoning_end(self, input_ids: list[int]) -> bool:
        """
        Check if the reasoning content ends in the input_ids.

        Used by structured engines like `xgrammar` to check if the
        reasoning content ends in the model output.

        Args:
            input_ids: The token IDs of the model output.

        Returns:
            True if the reasoning content ends in the input_ids.
        """

    def is_reasoning_end_streaming(
        self, input_ids: list[int], delta_ids: list[int]
    ) -> bool:
        """
        Check if the reasoning content ends during a decode step.

        Args:
            input_ids: The entire model output token IDs.
            delta_ids: The last few computed tokens at the current decode step.

        Returns:
            True if the reasoning content ends in the delta_ids.
        """
        return self.is_reasoning_end(input_ids)

    @abstractmethod
    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
        """
        Extract content token IDs from the input_ids.

        This extracts the non-reasoning content (e.g., everything after
        the </think> tag).

        Args:
            input_ids: The token IDs of the model output.

        Returns:
            The extracted content token IDs.
        """

    @abstractmethod
    def extract_response_outputs(
        self,
        model_output: str,
        request: ResponsesRequest,
        enable_auto_tools: bool = False,
        tool_call_id_type: str = "random",
        logprobs: list[Logprob] | None = None,
    ) -> list[ResponseOutputItem]:
        """
        Extract reasoning, content, and tool calls from a complete
        model-generated string and return as ResponseOutputItem objects.

        Used for non-streaming responses where we have the entire model
        response available before sending to the client.

        Args:
            model_output: The complete model-generated string.
            request: The request object used to generate the output.
            enable_auto_tools: Whether to enable automatic tool call parsing.
            tool_call_id_type: Type of tool call ID generation ("random", etc).
            logprobs: Pre-computed logprobs for the output text, if any.

        Returns:
            A list of ResponseOutputItem objects.
        """

    @abstractmethod
    def extract_reasoning(
        self,
        model_output: str,
        request: ChatCompletionRequest | ResponsesRequest,
    ) -> tuple[str | None, str | None]:
        """
        Extract reasoning content from a complete model-generated string.

        Used for non-streaming responses where we have the entire model
        response available before sending to the client.

        Args:
            model_output: The complete model-generated string.
            request: The request object used to generate the output.

        Returns:
            A tuple of (reasoning_content, response_content).
        """

    @abstractmethod
    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
    ) -> DeltaMessage | None:
        """
        Extract reasoning content from a streaming delta message.

        Args:
            previous_text: Text from all previous tokens.
            current_text: Text including the current delta.
            delta_text: The new text in this delta.
            previous_token_ids: Token IDs from previous generation.
            current_token_ids: All token IDs including current.
            delta_token_ids: The new token IDs in this delta.

        Returns:
            A DeltaMessage with reasoning and/or content fields, or None.
        """

    # ========== Tool Parser Methods ==========

    def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
        """
        Adjust the request parameters for tool calling.

        Can be overridden by subclasses to modify request parameters
        (e.g., setting structured output schemas for tool calling).

        Args:
            request: The original request.

        Returns:
            The adjusted request.
        """
        return request

    @abstractmethod
    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:
        """
        Extract tool calls from a complete model-generated string.

        Used for non-streaming responses.

        Args:
            model_output: The complete model-generated string.
            request: The request object used to generate the output.

        Returns:
            ExtractedToolCallInformation containing the tool calls.
        """

    @abstractmethod
    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> DeltaMessage | None:
        """
        Extract tool calls from a streaming delta message.

        Args:
            previous_text: Text from all previous tokens.
            current_text: Text including the current delta.
            delta_text: The new text in this delta.
            previous_token_ids: Token IDs from previous generation.
            current_token_ids: All token IDs including current.
            delta_token_ids: The new token IDs in this delta.
            request: The request object.

        Returns:
            A DeltaMessage with tool_calls field, or None.
        """


class DelegatingParser(Parser):
    """
    A Parser implementation that delegates to separate ReasoningParser and
    ToolParser instances.

    This is the recommended base class for creating model-specific parsers
    that combine existing reasoning and tool parser implementations.
    Subclasses should set `self._reasoning_parser` and `self._tool_parser`
    in their `__init__` method.

    If either parser is None, the corresponding methods will return default
    values (no reasoning extraction, no tool calls).
    """

    def extract_reasoning(
        self,
        model_output: str,
        request: ChatCompletionRequest | ResponsesRequest,
    ) -> tuple[str | None, str | None]:
        if self._reasoning_parser is None:
            return None, model_output
        return self._reasoning_parser.extract_reasoning(model_output, request)

    def extract_response_outputs(
        self,
        model_output: str,
        request: ResponsesRequest,
        enable_auto_tools: bool = False,
        tool_call_id_type: str = "random",
        logprobs: list[Logprob] | None = None,
    ) -> list[ResponseOutputItem]:
        # First extract reasoning
        reasoning, content = self.extract_reasoning(model_output, request)

        # Then parse tool calls from the content
        tool_calls, content = self._parse_tool_calls(
            request=request,
            content=content,
            enable_auto_tools=enable_auto_tools,
        )

        # Build output items
        outputs: list[ResponseOutputItem] = []

        # Add reasoning item if present
        if reasoning:
            reasoning_item = ResponseReasoningItem(
                id=f"rs_{random_uuid()}",
                summary=[],
                type="reasoning",
                content=[
                    ResponseReasoningTextContent(text=reasoning, type="reasoning_text")
                ],
                status=None,  # NOTE: Only the last output item has status.
            )
            outputs.append(reasoning_item)

        # Add message item if there's content
        if content:
            res_text_part = ResponseOutputText(
                text=content,
                annotations=[],
                type="output_text",
                logprobs=logprobs,
            )
            message_item = ResponseOutputMessage(
                id=f"msg_{random_uuid()}",
                content=[res_text_part],
                role="assistant",
                status="completed",
                type="message",
            )
            outputs.append(message_item)

        if tool_calls:
            # We use a simple counter for history_tool_call_count because
            # we don't track the history of tool calls in the Responses API yet.
            # This means that the tool call index will start from 0 for each
            # request.
            for history_tool_call_cnt, tool_call in enumerate(tool_calls):
                tool_call_item = ResponseFunctionToolCall(
                    id=f"fc_{random_uuid()}",
                    call_id=tool_call.id
                    if tool_call.id
                    else make_tool_call_id(
                        id_type=tool_call_id_type,
                        func_name=tool_call.name,
                        idx=history_tool_call_cnt,
                    ),
                    type="function_call",
                    status="completed",
                    name=tool_call.name,
                    arguments=tool_call.arguments,
                )
                outputs.append(tool_call_item)

        return outputs

    def _parse_tool_calls(
        self,
        request: ResponsesRequest,
        content: str | None,
        enable_auto_tools: bool,
    ) -> tuple[list[FunctionCall], str | None]:
        """
        TODO(qandrew): merge _parse_tool_calls_from_content
        for ChatCompletions into this function
        Parse tool calls from content based on request tool_choice settings.

        Returns:
            A tuple of (function_calls, remaining_content) if tool calls
            were parsed
        """
        function_calls: list[FunctionCall] = []

        if request.tool_choice and isinstance(request.tool_choice, ToolChoiceFunction):
            # Forced Function Call (Responses API style)
            assert content is not None
            function_calls.append(
                FunctionCall(name=request.tool_choice.name, arguments=content)
            )
            return function_calls, None  # Clear content since tool is called.

        if request.tool_choice and isinstance(
            request.tool_choice, ChatCompletionNamedToolChoiceParam
        ):
            # Forced Function Call (Chat Completion API style)
            assert content is not None
            function_calls.append(
                FunctionCall(name=request.tool_choice.function.name, arguments=content)
            )
            return function_calls, None  # Clear content since tool is called.

        if request.tool_choice == "required":
            # Required tool calls - parse JSON
            assert content is not None
            tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content)
            function_calls.extend(
                FunctionCall(
                    name=tool_call.name,
                    arguments=json.dumps(tool_call.parameters, ensure_ascii=False),
                )
                for tool_call in tool_calls
            )
            return function_calls, None  # Clear content since tool is called.

        if (
            self._tool_parser is not None
            and enable_auto_tools
            and (request.tool_choice == "auto" or request.tool_choice is None)
        ):
            # Automatic Tool Call Parsing
            tool_call_info = self._tool_parser.extract_tool_calls(
                content if content is not None else "",
                request=request,  # type: ignore
            )
            if tool_call_info is not None and tool_call_info.tools_called:
                function_calls.extend(
                    FunctionCall(
                        id=tool_call.id,
                        name=tool_call.function.name,
                        arguments=tool_call.function.arguments,
                    )
                    for tool_call in tool_call_info.tool_calls
                )
                remaining_content = tool_call_info.content
                if remaining_content and remaining_content.strip() == "":
                    remaining_content = None
                return function_calls, remaining_content

        # No tool calls
        return [], content

    def extract_reasoning_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
    ) -> DeltaMessage | None:
        if self._reasoning_parser is None:
            return DeltaMessage(content=delta_text)
        return self._reasoning_parser.extract_reasoning_streaming(
            previous_text,
            current_text,
            delta_text,
            previous_token_ids,
            current_token_ids,
            delta_token_ids,
        )

    def extract_tool_calls(
        self,
        model_output: str,
        request: ChatCompletionRequest,
    ) -> ExtractedToolCallInformation:
        if self._tool_parser is None:
            return ExtractedToolCallInformation(
                tools_called=False, tool_calls=[], content=model_output
            )
        return self._tool_parser.extract_tool_calls(model_output, request)

    def extract_tool_calls_streaming(
        self,
        previous_text: str,
        current_text: str,
        delta_text: str,
        previous_token_ids: Sequence[int],
        current_token_ids: Sequence[int],
        delta_token_ids: Sequence[int],
        request: ChatCompletionRequest,
    ) -> DeltaMessage | None:
        if self._tool_parser is None:
            return None
        return self._tool_parser.extract_tool_calls_streaming(
            previous_text,
            current_text,
            delta_text,
            previous_token_ids,
            current_token_ids,
            delta_token_ids,
            request,
        )


class _WrappedParser(DelegatingParser):
    """
    A DelegatingParser subclass that instantiates parsers from class attributes.

    This class is used to dynamically create a parser that wraps individual
    ReasoningParser and ToolParser classes. The class attributes
    `reasoning_parser_cls` and `tool_parser_cls` should be set before
    instantiation.

    Usage:
        _WrappedParser.reasoning_parser_cls = MyReasoningParser
        _WrappedParser.tool_parser_cls = MyToolParser
        parser = _WrappedParser(tokenizer)
    """

    reasoning_parser_cls: type[ReasoningParser] | None = None
    tool_parser_cls: type[ToolParser] | None = None

    def __init__(self, tokenizer: TokenizerLike):
        super().__init__(tokenizer)
        # Instantiate the underlying parsers from class attributes
        if self.__class__.reasoning_parser_cls is not None:
            self._reasoning_parser = self.__class__.reasoning_parser_cls(tokenizer)
        if self.__class__.tool_parser_cls is not None:
            self._tool_parser = self.__class__.tool_parser_cls(tokenizer)
update 2026-04-09 11:23:47 +08:00			`# SPDX-License-Identifier: Apache-2.0`
			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`

			`import json`
			`from abc import abstractmethod`
			`from collections.abc import Sequence`
			`from functools import cached_property`

			`from openai.types.responses import (`
			`ResponseFunctionToolCall,`
			`ResponseOutputItem,`
			`ResponseOutputMessage,`
			`ResponseOutputText,`
			`ResponseReasoningItem,`
			`ToolChoiceFunction,`
			`)`
			`from openai.types.responses.response_output_text import Logprob`
			`from openai.types.responses.response_reasoning_item import (`
			`Content as ResponseReasoningTextContent,`
			`)`
			`from pydantic import TypeAdapter`

			`from vllm.entrypoints.chat_utils import make_tool_call_id`
			`from vllm.entrypoints.openai.chat_completion.protocol import (`
			`ChatCompletionNamedToolChoiceParam,`
			`ChatCompletionRequest,`
			`)`
			`from vllm.entrypoints.openai.engine.protocol import (`
			`DeltaMessage,`
			`ExtractedToolCallInformation,`
			`FunctionCall,`
			`FunctionDefinition,`
			`)`
			`from vllm.entrypoints.openai.responses.protocol import (`
			`ResponsesRequest,`
			`)`
			`from vllm.logger import init_logger`
			`from vllm.reasoning.abs_reasoning_parsers import ReasoningParser`
			`from vllm.tokenizers import TokenizerLike`
			`from vllm.tool_parsers.abstract_tool_parser import ToolParser`
			`from vllm.utils import random_uuid`

			`logger = init_logger(__name__)`


			`class Parser:`
			`"""`
			`Abstract Parser class that unifies ReasoningParser and ToolParser into`
			`a single interface for parsing model output.`

			`This class provides a unified way to handle both reasoning extraction`
			`(e.g., chain-of-thought content in <think> tags) and tool call extraction`
			`(e.g., function calls in XML/JSON format) from model outputs.`

			`Subclasses can either:`
			`1. Override the abstract methods directly for custom parsing logic`
			2. Set `reasoning_parser` and `tool_parser` properties to delegate to
			`existing parser implementations`

			`Class Attributes:`
			`reasoning_parser_cls: The ReasoningParser class to use (for compatibility`
			`with code that needs the class, not instance).`
			`tool_parser_cls: The ToolParser class to use (for compatibility with`
			`code that needs the class, not instance).`
			`"""`

			`# Class-level parser classes for compatibility with existing patterns`
			`# Subclasses should override these if they use specific parser classes`
			`reasoning_parser_cls: type[ReasoningParser] \| None = None`
			`tool_parser_cls: type[ToolParser] \| None = None`

			`def __init__(self, tokenizer: TokenizerLike, args, *kwargs):`
			`"""`
			`Initialize the Parser.`

			`Args:`
			`tokenizer: The tokenizer used by the model. This is required for`
			`token-based parsing operations.`
			`"""`
			`self.model_tokenizer = tokenizer`
			`self._reasoning_parser: ReasoningParser \| None = None`
			`self._tool_parser: ToolParser \| None = None`

			`@cached_property`
			`def vocab(self) -> dict[str, int]:`
			`"""Get the vocabulary mapping from tokens to IDs."""`
			`return self.model_tokenizer.get_vocab()`

			`@property`
			`def reasoning_parser(self) -> ReasoningParser \| None:`
			`"""The underlying reasoning parser, if any."""`
			`return self._reasoning_parser`

			`@reasoning_parser.setter`
			`def reasoning_parser(self, parser: ReasoningParser \| None) -> None:`
			`self._reasoning_parser = parser`

			`@property`
			`def tool_parser(self) -> ToolParser \| None:`
			`"""The underlying tool parser, if any."""`
			`return self._tool_parser`

			`@tool_parser.setter`
			`def tool_parser(self, parser: ToolParser \| None) -> None:`
			`self._tool_parser = parser`

			`# ========== Reasoning Parser Methods ==========`

			`@abstractmethod`
			`def is_reasoning_end(self, input_ids: list[int]) -> bool:`
			`"""`
			`Check if the reasoning content ends in the input_ids.`

			Used by structured engines like `xgrammar` to check if the
			`reasoning content ends in the model output.`

			`Args:`
			`input_ids: The token IDs of the model output.`

			`Returns:`
			`True if the reasoning content ends in the input_ids.`
			`"""`

			`def is_reasoning_end_streaming(`
			`self, input_ids: list[int], delta_ids: list[int]`
			`) -> bool:`
			`"""`
			`Check if the reasoning content ends during a decode step.`

			`Args:`
			`input_ids: The entire model output token IDs.`
			`delta_ids: The last few computed tokens at the current decode step.`

			`Returns:`
			`True if the reasoning content ends in the delta_ids.`
			`"""`
			`return self.is_reasoning_end(input_ids)`

			`@abstractmethod`
			`def extract_content_ids(self, input_ids: list[int]) -> list[int]:`
			`"""`
			`Extract content token IDs from the input_ids.`

			`This extracts the non-reasoning content (e.g., everything after`
			`the </think> tag).`

			`Args:`
			`input_ids: The token IDs of the model output.`

			`Returns:`
			`The extracted content token IDs.`
			`"""`

			`@abstractmethod`
			`def extract_response_outputs(`
			`self,`
			`model_output: str,`
			`request: ResponsesRequest,`
			`enable_auto_tools: bool = False,`
			`tool_call_id_type: str = "random",`
			`logprobs: list[Logprob] \| None = None,`
			`) -> list[ResponseOutputItem]:`
			`"""`
			`Extract reasoning, content, and tool calls from a complete`
			`model-generated string and return as ResponseOutputItem objects.`

			`Used for non-streaming responses where we have the entire model`
			`response available before sending to the client.`

			`Args:`
			`model_output: The complete model-generated string.`
			`request: The request object used to generate the output.`
			`enable_auto_tools: Whether to enable automatic tool call parsing.`
			`tool_call_id_type: Type of tool call ID generation ("random", etc).`
			`logprobs: Pre-computed logprobs for the output text, if any.`

			`Returns:`
			`A list of ResponseOutputItem objects.`
			`"""`

			`@abstractmethod`
			`def extract_reasoning(`
			`self,`
			`model_output: str,`
			`request: ChatCompletionRequest \| ResponsesRequest,`
			`) -> tuple[str \| None, str \| None]:`
			`"""`
			`Extract reasoning content from a complete model-generated string.`

			`Used for non-streaming responses where we have the entire model`
			`response available before sending to the client.`

			`Args:`
			`model_output: The complete model-generated string.`
			`request: The request object used to generate the output.`

			`Returns:`
			`A tuple of (reasoning_content, response_content).`
			`"""`

			`@abstractmethod`
			`def extract_reasoning_streaming(`
			`self,`
			`previous_text: str,`
			`current_text: str,`
			`delta_text: str,`
			`previous_token_ids: Sequence[int],`
			`current_token_ids: Sequence[int],`
			`delta_token_ids: Sequence[int],`
			`) -> DeltaMessage \| None:`
			`"""`
			`Extract reasoning content from a streaming delta message.`

			`Args:`
			`previous_text: Text from all previous tokens.`
			`current_text: Text including the current delta.`
			`delta_text: The new text in this delta.`
			`previous_token_ids: Token IDs from previous generation.`
			`current_token_ids: All token IDs including current.`
			`delta_token_ids: The new token IDs in this delta.`

			`Returns:`
			`A DeltaMessage with reasoning and/or content fields, or None.`
			`"""`

			`# ========== Tool Parser Methods ==========`

			`def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:`
			`"""`
			`Adjust the request parameters for tool calling.`

			`Can be overridden by subclasses to modify request parameters`
			`(e.g., setting structured output schemas for tool calling).`

			`Args:`
			`request: The original request.`

			`Returns:`
			`The adjusted request.`
			`"""`
			`return request`

			`@abstractmethod`
			`def extract_tool_calls(`
			`self,`
			`model_output: str,`
			`request: ChatCompletionRequest,`
			`) -> ExtractedToolCallInformation:`
			`"""`
			`Extract tool calls from a complete model-generated string.`

			`Used for non-streaming responses.`

			`Args:`
			`model_output: The complete model-generated string.`
			`request: The request object used to generate the output.`

			`Returns:`
			`ExtractedToolCallInformation containing the tool calls.`
			`"""`

			`@abstractmethod`
			`def extract_tool_calls_streaming(`
			`self,`
			`previous_text: str,`
			`current_text: str,`
			`delta_text: str,`
			`previous_token_ids: Sequence[int],`
			`current_token_ids: Sequence[int],`
			`delta_token_ids: Sequence[int],`
			`request: ChatCompletionRequest,`
			`) -> DeltaMessage \| None:`
			`"""`
			`Extract tool calls from a streaming delta message.`

			`Args:`
			`previous_text: Text from all previous tokens.`
			`current_text: Text including the current delta.`
			`delta_text: The new text in this delta.`
			`previous_token_ids: Token IDs from previous generation.`
			`current_token_ids: All token IDs including current.`
			`delta_token_ids: The new token IDs in this delta.`
			`request: The request object.`

			`Returns:`
			`A DeltaMessage with tool_calls field, or None.`
			`"""`


			`class DelegatingParser(Parser):`
			`"""`
			`A Parser implementation that delegates to separate ReasoningParser and`
			`ToolParser instances.`

			`This is the recommended base class for creating model-specific parsers`
			`that combine existing reasoning and tool parser implementations.`
			Subclasses should set `self._reasoning_parser` and `self._tool_parser`
			in their `__init__` method.

			`If either parser is None, the corresponding methods will return default`
			`values (no reasoning extraction, no tool calls).`
			`"""`

			`def extract_reasoning(`
			`self,`
			`model_output: str,`
			`request: ChatCompletionRequest \| ResponsesRequest,`
			`) -> tuple[str \| None, str \| None]:`
			`if self._reasoning_parser is None:`
			`return None, model_output`
			`return self._reasoning_parser.extract_reasoning(model_output, request)`

			`def extract_response_outputs(`
			`self,`
			`model_output: str,`
			`request: ResponsesRequest,`
			`enable_auto_tools: bool = False,`
			`tool_call_id_type: str = "random",`
			`logprobs: list[Logprob] \| None = None,`
			`) -> list[ResponseOutputItem]:`
			`# First extract reasoning`
			`reasoning, content = self.extract_reasoning(model_output, request)`

			`# Then parse tool calls from the content`
			`tool_calls, content = self._parse_tool_calls(`
			`request=request,`
			`content=content,`
			`enable_auto_tools=enable_auto_tools,`
			`)`

			`# Build output items`
			`outputs: list[ResponseOutputItem] = []`

			`# Add reasoning item if present`
			`if reasoning:`
			`reasoning_item = ResponseReasoningItem(`
			`id=f"rs_{random_uuid()}",`
			`summary=[],`
			`type="reasoning",`
			`content=[`
			`ResponseReasoningTextContent(text=reasoning, type="reasoning_text")`
			`],`
			`status=None, # NOTE: Only the last output item has status.`
			`)`
			`outputs.append(reasoning_item)`

			`# Add message item if there's content`
			`if content:`
			`res_text_part = ResponseOutputText(`
			`text=content,`
			`annotations=[],`
			`type="output_text",`
			`logprobs=logprobs,`
			`)`
			`message_item = ResponseOutputMessage(`
			`id=f"msg_{random_uuid()}",`
			`content=[res_text_part],`
			`role="assistant",`
			`status="completed",`
			`type="message",`
			`)`
			`outputs.append(message_item)`

			`if tool_calls:`
			`# We use a simple counter for history_tool_call_count because`
			`# we don't track the history of tool calls in the Responses API yet.`
			`# This means that the tool call index will start from 0 for each`
			`# request.`
			`for history_tool_call_cnt, tool_call in enumerate(tool_calls):`
			`tool_call_item = ResponseFunctionToolCall(`
			`id=f"fc_{random_uuid()}",`
			`call_id=tool_call.id`
			`if tool_call.id`
			`else make_tool_call_id(`
			`id_type=tool_call_id_type,`
			`func_name=tool_call.name,`
			`idx=history_tool_call_cnt,`
			`),`
			`type="function_call",`
			`status="completed",`
			`name=tool_call.name,`
			`arguments=tool_call.arguments,`
			`)`
			`outputs.append(tool_call_item)`

			`return outputs`

			`def _parse_tool_calls(`
			`self,`
			`request: ResponsesRequest,`
			`content: str \| None,`
			`enable_auto_tools: bool,`
			`) -> tuple[list[FunctionCall], str \| None]:`
			`"""`
			`TODO(qandrew): merge _parse_tool_calls_from_content`
			`for ChatCompletions into this function`
			`Parse tool calls from content based on request tool_choice settings.`

			`Returns:`
			`A tuple of (function_calls, remaining_content) if tool calls`
			`were parsed`
			`"""`
			`function_calls: list[FunctionCall] = []`

			`if request.tool_choice and isinstance(request.tool_choice, ToolChoiceFunction):`
			`# Forced Function Call (Responses API style)`
			`assert content is not None`
			`function_calls.append(`
			`FunctionCall(name=request.tool_choice.name, arguments=content)`
			`)`
			`return function_calls, None # Clear content since tool is called.`

			`if request.tool_choice and isinstance(`
			`request.tool_choice, ChatCompletionNamedToolChoiceParam`
			`):`
			`# Forced Function Call (Chat Completion API style)`
			`assert content is not None`
			`function_calls.append(`
			`FunctionCall(name=request.tool_choice.function.name, arguments=content)`
			`)`
			`return function_calls, None # Clear content since tool is called.`

			`if request.tool_choice == "required":`
			`# Required tool calls - parse JSON`
			`assert content is not None`
			`tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content)`
			`function_calls.extend(`
			`FunctionCall(`
			`name=tool_call.name,`
			`arguments=json.dumps(tool_call.parameters, ensure_ascii=False),`
			`)`
			`for tool_call in tool_calls`
			`)`
			`return function_calls, None # Clear content since tool is called.`

			`if (`
			`self._tool_parser is not None`
			`and enable_auto_tools`
			`and (request.tool_choice == "auto" or request.tool_choice is None)`
			`):`
			`# Automatic Tool Call Parsing`
			`tool_call_info = self._tool_parser.extract_tool_calls(`
			`content if content is not None else "",`
			`request=request, # type: ignore`
			`)`
			`if tool_call_info is not None and tool_call_info.tools_called:`
			`function_calls.extend(`
			`FunctionCall(`
			`id=tool_call.id,`
			`name=tool_call.function.name,`
			`arguments=tool_call.function.arguments,`
			`)`
			`for tool_call in tool_call_info.tool_calls`
			`)`
			`remaining_content = tool_call_info.content`
			`if remaining_content and remaining_content.strip() == "":`
			`remaining_content = None`
			`return function_calls, remaining_content`

			`# No tool calls`
			`return [], content`

			`def extract_reasoning_streaming(`
			`self,`
			`previous_text: str,`
			`current_text: str,`
			`delta_text: str,`
			`previous_token_ids: Sequence[int],`
			`current_token_ids: Sequence[int],`
			`delta_token_ids: Sequence[int],`
			`) -> DeltaMessage \| None:`
			`if self._reasoning_parser is None:`
			`return DeltaMessage(content=delta_text)`
			`return self._reasoning_parser.extract_reasoning_streaming(`
			`previous_text,`
			`current_text,`
			`delta_text,`
			`previous_token_ids,`
			`current_token_ids,`
			`delta_token_ids,`
			`)`

			`def extract_tool_calls(`
			`self,`
			`model_output: str,`
			`request: ChatCompletionRequest,`
			`) -> ExtractedToolCallInformation:`
			`if self._tool_parser is None:`
			`return ExtractedToolCallInformation(`
			`tools_called=False, tool_calls=[], content=model_output`
			`)`
			`return self._tool_parser.extract_tool_calls(model_output, request)`

			`def extract_tool_calls_streaming(`
			`self,`
			`previous_text: str,`
			`current_text: str,`
			`delta_text: str,`
			`previous_token_ids: Sequence[int],`
			`current_token_ids: Sequence[int],`
			`delta_token_ids: Sequence[int],`
			`request: ChatCompletionRequest,`
			`) -> DeltaMessage \| None:`
			`if self._tool_parser is None:`
			`return None`
			`return self._tool_parser.extract_tool_calls_streaming(`
			`previous_text,`
			`current_text,`
			`delta_text,`
			`previous_token_ids,`
			`current_token_ids,`
			`delta_token_ids,`
			`request,`
			`)`


			`class _WrappedParser(DelegatingParser):`
			`"""`
			`A DelegatingParser subclass that instantiates parsers from class attributes.`

			`This class is used to dynamically create a parser that wraps individual`
			`ReasoningParser and ToolParser classes. The class attributes`
			`reasoning_parser_cls` and `tool_parser_cls` should be set before
			`instantiation.`

			`Usage:`
			`_WrappedParser.reasoning_parser_cls = MyReasoningParser`
			`_WrappedParser.tool_parser_cls = MyToolParser`
			`parser = _WrappedParser(tokenizer)`
			`"""`

			`reasoning_parser_cls: type[ReasoningParser] \| None = None`
			`tool_parser_cls: type[ToolParser] \| None = None`

			`def __init__(self, tokenizer: TokenizerLike):`
			`super().__init__(tokenizer)`
			`# Instantiate the underlying parsers from class attributes`
			`if self.__class__.reasoning_parser_cls is not None:`
			`self._reasoning_parser = self.__class__.reasoning_parser_cls(tokenizer)`
			`if self.__class__.tool_parser_cls is not None:`
			`self._tool_parser = self.__class__.tool_parser_cls(tokenizer)`