add function call parser for DeepSeek V3 (#5224)
This commit is contained in:
@@ -163,6 +163,35 @@ When using FlashInfer MLA wrapper (`--attention-backend flashinfer`) with specul
|
|||||||
|
|
||||||
See [Separate Reasoning](https://docs.sglang.ai/backend/separate_reasoning.html).
|
See [Separate Reasoning](https://docs.sglang.ai/backend/separate_reasoning.html).
|
||||||
|
|
||||||
|
|
||||||
|
### Function calling for DeepSeek Models
|
||||||
|
|
||||||
|
Add arguments `--tool-call-parser deepseekv3` to enable this feature. For example (running on 1 * H20 node):
|
||||||
|
|
||||||
|
```
|
||||||
|
python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3-0324 --tp 8 --port 30000 --host 0.0.0.0 --mem-fraction-static 0.9 --disable-cuda-graph --tool-call-parser deepseekv3
|
||||||
|
```
|
||||||
|
|
||||||
|
Sample Request:
|
||||||
|
|
||||||
|
```
|
||||||
|
curl "http://127.0.0.1:30000/v1/chat/completions" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"temperature": 0, "max_tokens": 100, "model": "deepseek-ai/DeepSeek-V3-0324", "tools": [{"type": "function", "function": {"name": "query_weather", "description": "Get weather of an city, the user should supply a city first", "parameters": {"type": "object", "properties": {"city": {"type": "string", "description": "The city, e.g. Beijing"}}, "required": ["city"]}}}], "messages": [{"role": "user", "content": "Hows the weather like in Qingdao today"}]}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected Response
|
||||||
|
|
||||||
|
```
|
||||||
|
{"id": "62af80528930423a82c806651ec66e7c", "object": "chat.completion", "created": 1744431333, "model": "deepseek-ai/DeepSeek-V3-0324", "choices": [{"index": 0, "message": {"role": "assistant", "content": null, "reasoning_content": null, "tool_calls": [{"id": "0", "type": "function", "function": {"name": "query_weather", "arguments": "{\\"city\\": \\"Guangzhou\\"}"}}]}, "logprobs": null, "finish_reason": "tool_calls", "matched_stop": null}], "usage": {"prompt_tokens": 118, "total_tokens": 140, "completion_tokens": 22, "prompt_tokens_details": null}}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Important Notes:
|
||||||
|
1. Use a lower `"temperature"` value for better results.
|
||||||
|
2. Currently, the function calling implementation for deepseek is incompatible with streaming requests.
|
||||||
|
|
||||||
|
|
||||||
## FAQ
|
## FAQ
|
||||||
|
|
||||||
1. **Question**: What should I do if model loading takes too long and NCCL timeout occurs?
|
1. **Question**: What should I do if model loading takes too long and NCCL timeout occurs?
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ TOOLS_TAG_LIST = [
|
|||||||
"<tool_call>",
|
"<tool_call>",
|
||||||
"<|python_tag|>",
|
"<|python_tag|>",
|
||||||
"[TOOL_CALLS]",
|
"[TOOL_CALLS]",
|
||||||
|
"<|tool▁calls▁begin|>",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -477,6 +478,64 @@ class Llama32Detector(BaseFormatDetector):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DeepSeekV3Detector(BaseFormatDetector):
|
||||||
|
"""
|
||||||
|
Detector for DeepSeek models.
|
||||||
|
Assumes function call format:
|
||||||
|
'<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_current_weather\n```json\n{"location": "Tokyo"}\n```<|tool▁call▁end|>\n<|tool▁call▁begin|>function<|tool▁sep|>get_current_weather\n```json\n{"location": "Paris"}\n```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.bot_token = "<|tool▁calls▁begin|>"
|
||||||
|
self.eot_token = "<|tool▁calls▁end|>"
|
||||||
|
self.func_call_regex = r"<|tool▁call▁begin|>.*?<|tool▁call▁end|>"
|
||||||
|
self.func_detail_regex = r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)\n```json\n(.*)\n```<|tool▁call▁end|>"
|
||||||
|
|
||||||
|
def has_tool_call(self, text: str) -> bool:
|
||||||
|
"""Check if the text contains a deepseek format tool call."""
|
||||||
|
return self.bot_token in text
|
||||||
|
|
||||||
|
def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
|
||||||
|
"""
|
||||||
|
One-time parsing: Detects and parses tool calls in the provided text.
|
||||||
|
|
||||||
|
:param text: The complete text to parse.
|
||||||
|
:param tools: List of available tools.
|
||||||
|
:return: ParseResult indicating success or failure, consumed text, leftover text, and parsed calls.
|
||||||
|
"""
|
||||||
|
idx = text.find(self.bot_token)
|
||||||
|
normal_text = text[:idx].strip() if idx != -1 else text
|
||||||
|
if self.bot_token not in text:
|
||||||
|
return StreamingParseResult(normal_text=normal_text, calls=[])
|
||||||
|
match_result_list = re.findall(self.func_call_regex, text, re.DOTALL)
|
||||||
|
calls = []
|
||||||
|
try:
|
||||||
|
for match_result in match_result_list:
|
||||||
|
# Get function name
|
||||||
|
func_detail = re.search(self.func_detail_regex, match_result, re.DOTALL)
|
||||||
|
func_name = func_detail.group(2)
|
||||||
|
func_args = func_detail.group(3)
|
||||||
|
func_args = json.loads(func_args)
|
||||||
|
# construct match_result for parse_base_json
|
||||||
|
match_result = {"name": func_name, "parameters": func_args}
|
||||||
|
calls.extend(self.parse_base_json(match_result, tools))
|
||||||
|
return StreamingParseResult(normal_text=normal_text, calls=calls)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in detect_and_parse: {e}")
|
||||||
|
# return the normal text if parsing fails
|
||||||
|
return StreamingParseResult(normal_text=text)
|
||||||
|
|
||||||
|
def structure_info(self) -> _GetInfoFunc:
|
||||||
|
return lambda name: StructureInfo(
|
||||||
|
begin="<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>"
|
||||||
|
+ name
|
||||||
|
+ "\n```json\n",
|
||||||
|
end="\n```<|tool▁call▁end|><|tool▁calls▁end|>",
|
||||||
|
trigger="<|tool▁calls▁begin|>",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class MultiFormatParser:
|
class MultiFormatParser:
|
||||||
def __init__(self, detectors: List[BaseFormatDetector]):
|
def __init__(self, detectors: List[BaseFormatDetector]):
|
||||||
"""
|
"""
|
||||||
@@ -543,6 +602,7 @@ class FunctionCallParser:
|
|||||||
"llama3": Llama32Detector,
|
"llama3": Llama32Detector,
|
||||||
"qwen25": Qwen25Detector,
|
"qwen25": Qwen25Detector,
|
||||||
"mistral": MistralDetector,
|
"mistral": MistralDetector,
|
||||||
|
"deepseekv3": DeepSeekV3Detector,
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, tools: List[Tool], tool_call_parser: str):
|
def __init__(self, tools: List[Tool], tool_call_parser: str):
|
||||||
|
|||||||
@@ -938,6 +938,35 @@ def v1_chat_generate_request(
|
|||||||
|
|
||||||
if chat_template_name is None:
|
if chat_template_name is None:
|
||||||
openai_compatible_messages = []
|
openai_compatible_messages = []
|
||||||
|
if (
|
||||||
|
tools
|
||||||
|
and tokenizer_manager.server_args.tool_call_parser == "deepseekv3"
|
||||||
|
):
|
||||||
|
# add function call prompt to deepseekv3
|
||||||
|
openai_compatible_messages.append(
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": """You are a helpful Assistant.
|
||||||
|
## Tools
|
||||||
|
### Function
|
||||||
|
You have the following functions available:
|
||||||
|
"""
|
||||||
|
+ "".join(
|
||||||
|
[
|
||||||
|
f"""
|
||||||
|
- `{tool['name']}`:
|
||||||
|
```json
|
||||||
|
{json.dumps(tool)}
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
for tool in tools
|
||||||
|
]
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
# TODO fix the compatible issues with xgrammar
|
||||||
|
strict_tag = None
|
||||||
|
|
||||||
for message in request.messages:
|
for message in request.messages:
|
||||||
if isinstance(message.content, str):
|
if isinstance(message.content, str):
|
||||||
openai_compatible_messages.append(
|
openai_compatible_messages.append(
|
||||||
|
|||||||
@@ -1087,7 +1087,7 @@ class ServerArgs:
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--tool-call-parser",
|
"--tool-call-parser",
|
||||||
type=str,
|
type=str,
|
||||||
choices=["qwen25", "mistral", "llama3"],
|
choices=["qwen25", "mistral", "llama3", "deepseekv3"],
|
||||||
default=ServerArgs.tool_call_parser,
|
default=ServerArgs.tool_call_parser,
|
||||||
help="Specify the parser for handling tool-call interactions. Options include: 'qwen25', 'mistral', and 'llama3'.",
|
help="Specify the parser for handling tool-call interactions. Options include: 'qwen25', 'mistral', and 'llama3'.",
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user