From a21ef36352003b1a63753d74ac7f745c2abf072c Mon Sep 17 00:00:00 2001 From: Frankey_8080 <32973306+Frank-Jie@users.noreply.github.com> Date: Sun, 27 Apr 2025 09:59:31 +0800 Subject: [PATCH] support for the DeepSeek model by enabling streaming response parsing (#5592) --- docs/references/deepseek.md | 25 ++++++- python/sglang/srt/function_call_parser.py | 82 +++++++++++++++++++++-- python/sglang/srt/openai_api/adapter.py | 2 - 3 files changed, 100 insertions(+), 9 deletions(-) diff --git a/docs/references/deepseek.md b/docs/references/deepseek.md index f5956bae4..1b6b40edc 100644 --- a/docs/references/deepseek.md +++ b/docs/references/deepseek.md @@ -193,10 +193,31 @@ Expected Response {"id": "62af80528930423a82c806651ec66e7c", "object": "chat.completion", "created": 1744431333, "model": "deepseek-ai/DeepSeek-V3-0324", "choices": [{"index": 0, "message": {"role": "assistant", "content": null, "reasoning_content": null, "tool_calls": [{"id": "0", "type": "function", "function": {"name": "query_weather", "arguments": "{\\"city\\": \\"Guangzhou\\"}"}}]}, "logprobs": null, "finish_reason": "tool_calls", "matched_stop": null}], "usage": {"prompt_tokens": 118, "total_tokens": 140, "completion_tokens": 22, "prompt_tokens_details": null}} ``` - +Sample Streaming Request: +``` +curl "http://127.0.0.1:30000/v1/chat/completions" \ +-H "Content-Type: application/json" \ +-d '{"temperature": 0, "max_tokens": 100, "model": "deepseek-ai/DeepSeek-V3-0324","stream":true,"tools": [{"type": "function", "function": {"name": "query_weather", "description": "Get weather of an city, the user should supply a city first", "parameters": {"type": "object", "properties": {"city": {"type": "string", "description": "The city, e.g. Beijing"}}, "required": ["city"]}}}], "messages": [{"role": "user", "content": "Hows the weather like in Qingdao today"}]}' +``` +Expected Streamed Chunks (simplified for clarity): +``` +data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"{\""}}]}}]} +data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"city"}}]}}]} +data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"\":\""}}]}}]} +data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"Q"}}]}}]} +data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"ing"}}]}}]} +data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"dao"}}]}}]} +data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"\"}"}}]}}]} +data: {"choices":[{"delta":{"tool_calls":null}}], "finish_reason": "tool_calls"} +data: [DONE] +``` +The client needs to concatenate all arguments fragments to reconstruct the complete tool call: +``` +{"city": "Qingdao"} +``` Important Notes: 1. Use a lower `"temperature"` value for better results. -2. Currently, the function calling implementation for deepseek is incompatible with streaming requests. + ## FAQ diff --git a/python/sglang/srt/function_call_parser.py b/python/sglang/srt/function_call_parser.py index 484f39490..abc6cf650 100644 --- a/python/sglang/srt/function_call_parser.py +++ b/python/sglang/srt/function_call_parser.py @@ -491,6 +491,7 @@ class DeepSeekV3Detector(BaseFormatDetector): self.eot_token = "<|tool▁calls▁end|>" self.func_call_regex = r"<|tool▁call▁begin|>.*?<|tool▁call▁end|>" self.func_detail_regex = r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)\n```json\n(.*)\n```<|tool▁call▁end|>" + self._last_arguments = "" def has_tool_call(self, text: str) -> bool: """Check if the text contains a deepseek format tool call.""" @@ -528,13 +529,84 @@ class DeepSeekV3Detector(BaseFormatDetector): def structure_info(self) -> _GetInfoFunc: return lambda name: StructureInfo( - begin="<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>" - + name - + "\n```json\n", - end="\n```<|tool▁call▁end|><|tool▁calls▁end|>", - trigger="<|tool▁calls▁begin|>", + begin=">" + name + "\n```json\n", + end="\n```<", + trigger=">" + name + "\n```json\n", ) + def parse_streaming_increment( + self, new_text: str, tools: List[Tool] + ) -> StreamingParseResult: + """ + Streaming incremental parsing tool calls for DeepSeekV3 format. + """ + self._buffer += new_text + current_text = self._buffer + + if self.bot_token not in current_text: + self._buffer = "" + for e_token in [self.eot_token, "```", "<|tool▁call▁end|>"]: + if e_token in new_text: + new_text = new_text.replace(e_token, "") + return StreamingParseResult(normal_text=new_text) + + if not hasattr(self, "_tool_indices"): + self._tool_indices = { + tool.function.name: i + for i, tool in enumerate(tools) + if tool.function and tool.function.name + } + + calls: list[ToolCallItem] = [] + try: + partial_match = re.search( + pattern=r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)\n```json\n(.*)", + string=current_text, + flags=re.DOTALL, + ) + if partial_match: + func_name = partial_match.group(2).strip() + func_args_raw = partial_match.group(3).strip() + + if not self.current_tool_name_sent: + calls.append( + ToolCallItem( + tool_index=self._tool_indices.get(func_name, 0), + name=func_name, + parameters="", + ) + ) + self.current_tool_name_sent = True + else: + argument_diff = ( + func_args_raw[len(self._last_arguments) :] + if func_args_raw.startswith(self._last_arguments) + else func_args_raw + ) + + if argument_diff: + calls.append( + ToolCallItem( + tool_index=self._tool_indices.get(func_name, 0), + name=None, + parameters=argument_diff, + ) + ) + self._last_arguments += argument_diff + + if _is_complete_json(func_args_raw): + result = StreamingParseResult(normal_text="", calls=calls) + self._buffer = "" + self._last_arguments = "" + self.current_tool_name_sent = False + return result + + return StreamingParseResult(normal_text="", calls=calls) + + except Exception as e: + logger.error(f"Error in parse_streaming_increment: {e}") + return StreamingParseResult(normal_text=current_text) + class MultiFormatParser: def __init__(self, detectors: List[BaseFormatDetector]): diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index 040548ad0..944a1c121 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -966,8 +966,6 @@ def v1_chat_generate_request( ), } ) - # TODO fix the compatible issues with xgrammar - strict_tag = None for message in request.messages: if isinstance(message.content, str):