From f3b5db6ee8b1ba011ded06648a31912e6b82edff Mon Sep 17 00:00:00 2001 From: Xinyuan Tong <115166877+JustinTong0323@users.noreply.github.com> Date: Wed, 10 Sep 2025 14:03:55 -0700 Subject: [PATCH] Feat: support disable tool parser (#10184) --- .../srt/entrypoints/openai/serving_chat.py | 37 ++++++++++++------- .../openai_server/basic/test_serving_chat.py | 5 +-- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/python/sglang/srt/entrypoints/openai/serving_chat.py b/python/sglang/srt/entrypoints/openai/serving_chat.py index 690604922..215c61c36 100644 --- a/python/sglang/srt/entrypoints/openai/serving_chat.py +++ b/python/sglang/srt/entrypoints/openai/serving_chat.py @@ -53,6 +53,7 @@ class OpenAIServingChat(OpenAIServingBase): ): super().__init__(tokenizer_manager) self.template_manager = template_manager + self.tool_call_parser = self.tokenizer_manager.server_args.tool_call_parser def _request_id_prefix(self) -> str: return "chatcmpl-" @@ -172,10 +173,11 @@ class OpenAIServingChat(OpenAIServingBase): ] else: tools = [item.function.model_dump() for item in request.tools] - - tool_call_parser = self.tokenizer_manager.server_args.tool_call_parser - parser = FunctionCallParser(request.tools, tool_call_parser) - tool_call_constraint = parser.get_structure_constraint(request.tool_choice) + if self.tool_call_parser: + parser = FunctionCallParser(request.tools, self.tool_call_parser) + tool_call_constraint = parser.get_structure_constraint( + request.tool_choice + ) # Use chat template if self.template_manager.chat_template_name is None: @@ -537,7 +539,11 @@ class OpenAIServingChat(OpenAIServingBase): yield f"data: {chunk.model_dump_json()}\n\n" # Handle tool calls - if request.tool_choice != "none" and request.tools: + if ( + request.tool_choice != "none" + and request.tools + and self.tool_call_parser + ): async for chunk in self._process_tool_call_stream( index, delta, @@ -727,10 +733,13 @@ class OpenAIServingChat(OpenAIServingBase): # Handle tool calls tool_calls = None - if request.tool_choice != "none" and request.tools: - tool_call_parser = self.tokenizer_manager.server_args.tool_call_parser + if ( + request.tool_choice != "none" + and request.tools + and self.tool_call_parser + ): tool_calls, text, finish_reason = self._process_tool_calls( - text, request.tools, tool_call_parser, finish_reason + text, request.tools, finish_reason ) choice_data = ChatCompletionResponseChoice( @@ -824,11 +833,10 @@ class OpenAIServingChat(OpenAIServingBase): self, text: str, tools: List[Any], - tool_call_parser: Optional[str], finish_reason: Dict[str, Any], ) -> tuple[Optional[List[ToolCall]], str, Dict[str, Any]]: """Process tool calls in the response""" - parser = FunctionCallParser(tools, tool_call_parser) + parser = FunctionCallParser(tools, self.tool_call_parser) if parser.has_tool_call(text): if finish_reason["type"] == "stop": finish_reason["type"] = "tool_calls" @@ -838,7 +846,10 @@ class OpenAIServingChat(OpenAIServingBase): tool_calls = [] for call_info in call_info_list: # For Kimi-K2, align tool_call_id with the model format: functions.{name}:{index} - if tool_call_parser == "kimi_k2" and call_info.name is not None: + if ( + self.tool_call_parser == "kimi_k2" + and call_info.name is not None + ): tool_id = f"functions.{call_info.name}:{call_info.tool_index}" else: tool_id = f"call_{uuid.uuid4().hex[:24]}" @@ -933,7 +944,7 @@ class OpenAIServingChat(OpenAIServingBase): if index not in parser_dict: parser_dict[index] = FunctionCallParser( tools=request.tools, - tool_call_parser=self.tokenizer_manager.server_args.tool_call_parser, + tool_call_parser=self.tool_call_parser, ) parser = parser_dict[index] @@ -962,7 +973,7 @@ class OpenAIServingChat(OpenAIServingBase): # Tool call ID should be generated only once per tool call if call_item.name: # First chunk: include ID and function name - if self.tokenizer_manager.server_args.tool_call_parser == "kimi_k2": + if self.tool_call_parser == "kimi_k2": # Align with Kimi-K2 format: functions.{name}:{index} tool_call_id = f"functions.{call_item.name}:{call_item.tool_index}" else: diff --git a/test/srt/openai_server/basic/test_serving_chat.py b/test/srt/openai_server/basic/test_serving_chat.py index 41eaea2ee..9f0d48004 100644 --- a/test/srt/openai_server/basic/test_serving_chat.py +++ b/test/srt/openai_server/basic/test_serving_chat.py @@ -332,7 +332,7 @@ class ServingChatTestCase(unittest.TestCase): """Ensure non-streaming tool_call.id matches functions.{name}:{index} for kimi_k2 parser.""" # Force kimi_k2 parser - self.tm.server_args.tool_call_parser = "kimi_k2" + self.chat.tool_call_parser = "kimi_k2" # Mock FunctionCallParser.parse_non_stream to return one tool call with patch( @@ -357,7 +357,6 @@ class ServingChatTestCase(unittest.TestCase): tool_calls, remaining_text, _ = self.chat._process_tool_calls( text="<|tool_calls_section_begin|>...", tools=tools, - tool_call_parser="kimi_k2", finish_reason=finish_reason, ) @@ -370,7 +369,7 @@ class ServingChatTestCase(unittest.TestCase): """Ensure streaming first chunk tool_call.id matches functions.{name}:{index} for kimi_k2 parser.""" # Force kimi_k2 parser - self.tm.server_args.tool_call_parser = "kimi_k2" + self.chat.tool_call_parser = "kimi_k2" # Prepare request with tools req = ChatCompletionRequest(