Sync from v0.13

2026-01-19 10:38:50 +08:00
parent b2ef04d792
commit 5aef6c175a
3714 changed files with 854317 additions and 89342 deletions
--- a/vllm/tool_parsers/openai_tool_parser.py
+++ b/vllm/tool_parsers/openai_tool_parser.py
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import json
+from collections.abc import Sequence
+from typing import TYPE_CHECKING
+
+from vllm.entrypoints.openai.parser.harmony_utils import parse_output_into_messages
+from vllm.entrypoints.openai.protocol import (
+    ChatCompletionRequest,
+    DeltaMessage,
+    ExtractedToolCallInformation,
+    FunctionCall,
+    ToolCall,
+)
+from vllm.logger import init_logger
+from vllm.tool_parsers.abstract_tool_parser import (
+    ToolParser,
+)
+
+if TYPE_CHECKING:
+    from vllm.tokenizers import TokenizerLike
+else:
+    TokenizerLike = object
+
+logger = init_logger(__name__)
+
+
+class OpenAIToolParser(ToolParser):
+    def __init__(self, tokenizer: "TokenizerLike"):
+        super().__init__(tokenizer)
+
+    def extract_tool_calls(
+        self,
+        model_output: str,
+        request: ChatCompletionRequest,
+        token_ids: Sequence[int] | None = None,
+    ) -> ExtractedToolCallInformation:
+        if token_ids is None:
+            raise NotImplementedError(
+                "OpenAIToolParser requires token IDs and does not support text-based extraction."  # noqa: E501
+            )
+
+        parser = parse_output_into_messages(token_ids)
+        tool_calls = []
+        final_content = None
+        commentary_content = None
+
+        if len(parser.messages) > 0:
+            for msg in parser.messages:
+                if len(msg.content) < 1:
+                    continue
+                msg_text = msg.content[0].text
+                if msg.recipient and msg.recipient.startswith("functions."):
+                    # If no content-type is given assume JSON, as that's the
+                    # most common case with gpt-oss models.
+                    if not msg.content_type or "json" in msg.content_type:
+                        # load and dump the JSON text to check validity and
+                        # remove any extra newlines or other odd formatting
+                        try:
+                            tool_args = json.dumps(json.loads(msg_text))
+                        except json.JSONDecodeError:
+                            logger.exception(
+                                "Error decoding JSON tool call from response."
+                            )
+                            tool_args = msg_text
+                    else:
+                        tool_args = msg_text
+                    tool_calls.append(
+                        ToolCall(
+                            type="function",
+                            function=FunctionCall(
+                                name=msg.recipient.split("functions.")[1],
+                                arguments=tool_args,
+                            ),
+                        )
+                    )
+                elif msg.channel == "final":
+                    final_content = msg_text
+                elif msg.channel == "commentary" and not msg.recipient:
+                    commentary_content = msg_text
+
+        return ExtractedToolCallInformation(
+            tools_called=len(tool_calls) > 0,
+            tool_calls=tool_calls,
+            # prefer final content over commentary content if both are present
+            # commentary content is tool call preambles meant to be shown to the user
+            content=final_content or commentary_content,
+        )
+
+    def extract_tool_calls_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
+    ) -> DeltaMessage | None:
+        raise NotImplementedError(
+            "Not being used, manual parsing in serving_chat.py"  # noqa: E501
+        )