Feat: support disable tool parser (#10184)
This commit is contained in:
@@ -53,6 +53,7 @@ class OpenAIServingChat(OpenAIServingBase):
|
||||
):
|
||||
super().__init__(tokenizer_manager)
|
||||
self.template_manager = template_manager
|
||||
self.tool_call_parser = self.tokenizer_manager.server_args.tool_call_parser
|
||||
|
||||
def _request_id_prefix(self) -> str:
|
||||
return "chatcmpl-"
|
||||
@@ -172,10 +173,11 @@ class OpenAIServingChat(OpenAIServingBase):
|
||||
]
|
||||
else:
|
||||
tools = [item.function.model_dump() for item in request.tools]
|
||||
|
||||
tool_call_parser = self.tokenizer_manager.server_args.tool_call_parser
|
||||
parser = FunctionCallParser(request.tools, tool_call_parser)
|
||||
tool_call_constraint = parser.get_structure_constraint(request.tool_choice)
|
||||
if self.tool_call_parser:
|
||||
parser = FunctionCallParser(request.tools, self.tool_call_parser)
|
||||
tool_call_constraint = parser.get_structure_constraint(
|
||||
request.tool_choice
|
||||
)
|
||||
|
||||
# Use chat template
|
||||
if self.template_manager.chat_template_name is None:
|
||||
@@ -537,7 +539,11 @@ class OpenAIServingChat(OpenAIServingBase):
|
||||
yield f"data: {chunk.model_dump_json()}\n\n"
|
||||
|
||||
# Handle tool calls
|
||||
if request.tool_choice != "none" and request.tools:
|
||||
if (
|
||||
request.tool_choice != "none"
|
||||
and request.tools
|
||||
and self.tool_call_parser
|
||||
):
|
||||
async for chunk in self._process_tool_call_stream(
|
||||
index,
|
||||
delta,
|
||||
@@ -727,10 +733,13 @@ class OpenAIServingChat(OpenAIServingBase):
|
||||
|
||||
# Handle tool calls
|
||||
tool_calls = None
|
||||
if request.tool_choice != "none" and request.tools:
|
||||
tool_call_parser = self.tokenizer_manager.server_args.tool_call_parser
|
||||
if (
|
||||
request.tool_choice != "none"
|
||||
and request.tools
|
||||
and self.tool_call_parser
|
||||
):
|
||||
tool_calls, text, finish_reason = self._process_tool_calls(
|
||||
text, request.tools, tool_call_parser, finish_reason
|
||||
text, request.tools, finish_reason
|
||||
)
|
||||
|
||||
choice_data = ChatCompletionResponseChoice(
|
||||
@@ -824,11 +833,10 @@ class OpenAIServingChat(OpenAIServingBase):
|
||||
self,
|
||||
text: str,
|
||||
tools: List[Any],
|
||||
tool_call_parser: Optional[str],
|
||||
finish_reason: Dict[str, Any],
|
||||
) -> tuple[Optional[List[ToolCall]], str, Dict[str, Any]]:
|
||||
"""Process tool calls in the response"""
|
||||
parser = FunctionCallParser(tools, tool_call_parser)
|
||||
parser = FunctionCallParser(tools, self.tool_call_parser)
|
||||
if parser.has_tool_call(text):
|
||||
if finish_reason["type"] == "stop":
|
||||
finish_reason["type"] = "tool_calls"
|
||||
@@ -838,7 +846,10 @@ class OpenAIServingChat(OpenAIServingBase):
|
||||
tool_calls = []
|
||||
for call_info in call_info_list:
|
||||
# For Kimi-K2, align tool_call_id with the model format: functions.{name}:{index}
|
||||
if tool_call_parser == "kimi_k2" and call_info.name is not None:
|
||||
if (
|
||||
self.tool_call_parser == "kimi_k2"
|
||||
and call_info.name is not None
|
||||
):
|
||||
tool_id = f"functions.{call_info.name}:{call_info.tool_index}"
|
||||
else:
|
||||
tool_id = f"call_{uuid.uuid4().hex[:24]}"
|
||||
@@ -933,7 +944,7 @@ class OpenAIServingChat(OpenAIServingBase):
|
||||
if index not in parser_dict:
|
||||
parser_dict[index] = FunctionCallParser(
|
||||
tools=request.tools,
|
||||
tool_call_parser=self.tokenizer_manager.server_args.tool_call_parser,
|
||||
tool_call_parser=self.tool_call_parser,
|
||||
)
|
||||
parser = parser_dict[index]
|
||||
|
||||
@@ -962,7 +973,7 @@ class OpenAIServingChat(OpenAIServingBase):
|
||||
# Tool call ID should be generated only once per tool call
|
||||
if call_item.name:
|
||||
# First chunk: include ID and function name
|
||||
if self.tokenizer_manager.server_args.tool_call_parser == "kimi_k2":
|
||||
if self.tool_call_parser == "kimi_k2":
|
||||
# Align with Kimi-K2 format: functions.{name}:{index}
|
||||
tool_call_id = f"functions.{call_item.name}:{call_item.tool_index}"
|
||||
else:
|
||||
|
||||
@@ -332,7 +332,7 @@ class ServingChatTestCase(unittest.TestCase):
|
||||
"""Ensure non-streaming tool_call.id matches functions.{name}:{index} for kimi_k2 parser."""
|
||||
|
||||
# Force kimi_k2 parser
|
||||
self.tm.server_args.tool_call_parser = "kimi_k2"
|
||||
self.chat.tool_call_parser = "kimi_k2"
|
||||
|
||||
# Mock FunctionCallParser.parse_non_stream to return one tool call
|
||||
with patch(
|
||||
@@ -357,7 +357,6 @@ class ServingChatTestCase(unittest.TestCase):
|
||||
tool_calls, remaining_text, _ = self.chat._process_tool_calls(
|
||||
text="<|tool_calls_section_begin|>...",
|
||||
tools=tools,
|
||||
tool_call_parser="kimi_k2",
|
||||
finish_reason=finish_reason,
|
||||
)
|
||||
|
||||
@@ -370,7 +369,7 @@ class ServingChatTestCase(unittest.TestCase):
|
||||
"""Ensure streaming first chunk tool_call.id matches functions.{name}:{index} for kimi_k2 parser."""
|
||||
|
||||
# Force kimi_k2 parser
|
||||
self.tm.server_args.tool_call_parser = "kimi_k2"
|
||||
self.chat.tool_call_parser = "kimi_k2"
|
||||
|
||||
# Prepare request with tools
|
||||
req = ChatCompletionRequest(
|
||||
|
||||
Reference in New Issue
Block a user