Feat: support disable tool parser (#10184)

2025-09-10 14:03:55 -07:00
parent 2286e85e77
commit f3b5db6ee8
2 changed files with 26 additions and 16 deletions
--- a/python/sglang/srt/entrypoints/openai/serving_chat.py
+++ b/python/sglang/srt/entrypoints/openai/serving_chat.py
@@ -53,6 +53,7 @@ class OpenAIServingChat(OpenAIServingBase):
    ):
        super().__init__(tokenizer_manager)
        self.template_manager = template_manager
+        self.tool_call_parser = self.tokenizer_manager.server_args.tool_call_parser

    def _request_id_prefix(self) -> str:
        return "chatcmpl-"
@@ -172,10 +173,11 @@ class OpenAIServingChat(OpenAIServingBase):
                ]
            else:
                tools = [item.function.model_dump() for item in request.tools]
-
-            tool_call_parser = self.tokenizer_manager.server_args.tool_call_parser
-            parser = FunctionCallParser(request.tools, tool_call_parser)
-            tool_call_constraint = parser.get_structure_constraint(request.tool_choice)
+            if self.tool_call_parser:
+                parser = FunctionCallParser(request.tools, self.tool_call_parser)
+                tool_call_constraint = parser.get_structure_constraint(
+                    request.tool_choice
+                )

        # Use chat template
        if self.template_manager.chat_template_name is None:
@@ -537,7 +539,11 @@ class OpenAIServingChat(OpenAIServingBase):
                        yield f"data: {chunk.model_dump_json()}\n\n"

                # Handle tool calls
-                if request.tool_choice != "none" and request.tools:
+                if (
+                    request.tool_choice != "none"
+                    and request.tools
+                    and self.tool_call_parser
+                ):
                    async for chunk in self._process_tool_call_stream(
                        index,
                        delta,
@@ -727,10 +733,13 @@ class OpenAIServingChat(OpenAIServingBase):

            # Handle tool calls
            tool_calls = None
-            if request.tool_choice != "none" and request.tools:
-                tool_call_parser = self.tokenizer_manager.server_args.tool_call_parser
+            if (
+                request.tool_choice != "none"
+                and request.tools
+                and self.tool_call_parser
+            ):
                tool_calls, text, finish_reason = self._process_tool_calls(
-                    text, request.tools, tool_call_parser, finish_reason
+                    text, request.tools, finish_reason
                )

            choice_data = ChatCompletionResponseChoice(
@@ -824,11 +833,10 @@ class OpenAIServingChat(OpenAIServingBase):
        self,
        text: str,
        tools: List[Any],
-        tool_call_parser: Optional[str],
        finish_reason: Dict[str, Any],
    ) -> tuple[Optional[List[ToolCall]], str, Dict[str, Any]]:
        """Process tool calls in the response"""
-        parser = FunctionCallParser(tools, tool_call_parser)
+        parser = FunctionCallParser(tools, self.tool_call_parser)
        if parser.has_tool_call(text):
            if finish_reason["type"] == "stop":
                finish_reason["type"] = "tool_calls"
@@ -838,7 +846,10 @@ class OpenAIServingChat(OpenAIServingBase):
                tool_calls = []
                for call_info in call_info_list:
                    # For Kimi-K2, align tool_call_id with the model format: functions.{name}:{index}
-                    if tool_call_parser == "kimi_k2" and call_info.name is not None:
+                    if (
+                        self.tool_call_parser == "kimi_k2"
+                        and call_info.name is not None
+                    ):
                        tool_id = f"functions.{call_info.name}:{call_info.tool_index}"
                    else:
                        tool_id = f"call_{uuid.uuid4().hex[:24]}"
@@ -933,7 +944,7 @@ class OpenAIServingChat(OpenAIServingBase):
        if index not in parser_dict:
            parser_dict[index] = FunctionCallParser(
                tools=request.tools,
-                tool_call_parser=self.tokenizer_manager.server_args.tool_call_parser,
+                tool_call_parser=self.tool_call_parser,
            )
        parser = parser_dict[index]

@@ -962,7 +973,7 @@ class OpenAIServingChat(OpenAIServingBase):
            # Tool call ID should be generated only once per tool call
            if call_item.name:
                # First chunk: include ID and function name
-                if self.tokenizer_manager.server_args.tool_call_parser == "kimi_k2":
+                if self.tool_call_parser == "kimi_k2":
                    # Align with Kimi-K2 format: functions.{name}:{index}
                    tool_call_id = f"functions.{call_item.name}:{call_item.tool_index}"
                else:
--- a/test/srt/openai_server/basic/test_serving_chat.py
+++ b/test/srt/openai_server/basic/test_serving_chat.py
@@ -332,7 +332,7 @@ class ServingChatTestCase(unittest.TestCase):
        """Ensure non-streaming tool_call.id matches functions.{name}:{index} for kimi_k2 parser."""

        # Force kimi_k2 parser
-        self.tm.server_args.tool_call_parser = "kimi_k2"
+        self.chat.tool_call_parser = "kimi_k2"

        # Mock FunctionCallParser.parse_non_stream to return one tool call
        with patch(
@@ -357,7 +357,6 @@ class ServingChatTestCase(unittest.TestCase):
            tool_calls, remaining_text, _ = self.chat._process_tool_calls(
                text="<|tool_calls_section_begin|>...",
                tools=tools,
-                tool_call_parser="kimi_k2",
                finish_reason=finish_reason,
            )

@@ -370,7 +369,7 @@ class ServingChatTestCase(unittest.TestCase):
        """Ensure streaming first chunk tool_call.id matches functions.{name}:{index} for kimi_k2 parser."""

        # Force kimi_k2 parser
-        self.tm.server_args.tool_call_parser = "kimi_k2"
+        self.chat.tool_call_parser = "kimi_k2"

        # Prepare request with tools
        req = ChatCompletionRequest(