add function call parser for DeepSeek V3 (#5224)

This commit is contained in:
Yi Zhou
2025-04-21 08:38:08 +08:00
committed by GitHub
parent 8b39274e34
commit fac17acf08
4 changed files with 119 additions and 1 deletions

View File

@@ -25,6 +25,7 @@ TOOLS_TAG_LIST = [
"<tool_call>",
"<|python_tag|>",
"[TOOL_CALLS]",
"<tool▁calls▁begin>",
]
@@ -477,6 +478,64 @@ class Llama32Detector(BaseFormatDetector):
)
class DeepSeekV3Detector(BaseFormatDetector):
"""
Detector for DeepSeek models.
Assumes function call format:
'<tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>get_current_weather\n```json\n{"location": "Tokyo"}\n```<tool▁call▁end>\n<tool▁call▁begin>function<tool▁sep>get_current_weather\n```json\n{"location": "Paris"}\n```<tool▁call▁end><tool▁calls▁end><end▁of▁sentence>
"""
def __init__(self):
super().__init__()
self.bot_token = "<tool▁calls▁begin>"
self.eot_token = "<tool▁calls▁end>"
self.func_call_regex = r"<tool▁call▁begin>.*?<tool▁call▁end>"
self.func_detail_regex = r"<tool▁call▁begin>(.*)<tool▁sep>(.*)\n```json\n(.*)\n```<tool▁call▁end>"
def has_tool_call(self, text: str) -> bool:
"""Check if the text contains a deepseek format tool call."""
return self.bot_token in text
def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
"""
One-time parsing: Detects and parses tool calls in the provided text.
:param text: The complete text to parse.
:param tools: List of available tools.
:return: ParseResult indicating success or failure, consumed text, leftover text, and parsed calls.
"""
idx = text.find(self.bot_token)
normal_text = text[:idx].strip() if idx != -1 else text
if self.bot_token not in text:
return StreamingParseResult(normal_text=normal_text, calls=[])
match_result_list = re.findall(self.func_call_regex, text, re.DOTALL)
calls = []
try:
for match_result in match_result_list:
# Get function name
func_detail = re.search(self.func_detail_regex, match_result, re.DOTALL)
func_name = func_detail.group(2)
func_args = func_detail.group(3)
func_args = json.loads(func_args)
# construct match_result for parse_base_json
match_result = {"name": func_name, "parameters": func_args}
calls.extend(self.parse_base_json(match_result, tools))
return StreamingParseResult(normal_text=normal_text, calls=calls)
except Exception as e:
logger.error(f"Error in detect_and_parse: {e}")
# return the normal text if parsing fails
return StreamingParseResult(normal_text=text)
def structure_info(self) -> _GetInfoFunc:
return lambda name: StructureInfo(
begin="<tool▁calls▁begin><tool▁call▁begin>function<tool▁sep>"
+ name
+ "\n```json\n",
end="\n```<tool▁call▁end><tool▁calls▁end>",
trigger="<tool▁calls▁begin>",
)
class MultiFormatParser:
def __init__(self, detectors: List[BaseFormatDetector]):
"""
@@ -543,6 +602,7 @@ class FunctionCallParser:
"llama3": Llama32Detector,
"qwen25": Qwen25Detector,
"mistral": MistralDetector,
"deepseekv3": DeepSeekV3Detector,
}
def __init__(self, tools: List[Tool], tool_call_parser: str):

View File

@@ -938,6 +938,35 @@ def v1_chat_generate_request(
if chat_template_name is None:
openai_compatible_messages = []
if (
tools
and tokenizer_manager.server_args.tool_call_parser == "deepseekv3"
):
# add function call prompt to deepseekv3
openai_compatible_messages.append(
{
"role": "system",
"content": """You are a helpful Assistant.
## Tools
### Function
You have the following functions available:
"""
+ "".join(
[
f"""
- `{tool['name']}`:
```json
{json.dumps(tool)}
```
"""
for tool in tools
]
),
}
)
# TODO fix the compatible issues with xgrammar
strict_tag = None
for message in request.messages:
if isinstance(message.content, str):
openai_compatible_messages.append(

View File

@@ -1087,7 +1087,7 @@ class ServerArgs:
parser.add_argument(
"--tool-call-parser",
type=str,
choices=["qwen25", "mistral", "llama3"],
choices=["qwen25", "mistral", "llama3", "deepseekv3"],
default=ServerArgs.tool_call_parser,
help="Specify the parser for handling tool-call interactions. Options include: 'qwen25', 'mistral', and 'llama3'.",
)