From 6fa6f38ed33e78993ced2b89ed65aebaaaa9aebb Mon Sep 17 00:00:00 2001 From: mlmz <54172054+minleminzui@users.noreply.github.com> Date: Mon, 28 Apr 2025 22:07:45 +0800 Subject: [PATCH] =?UTF-8?q?Feat:=20add=20support=20for=20thinking=20mode?= =?UTF-8?q?=20via=20chat=5Ftemplate=5Fkwargs.enable=5Ft=E2=80=A6=20(#5551)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: shuaills Co-authored-by: Chayenne Co-authored-by: Lianmin Zheng Co-authored-by: Yineng Zhang --- python/sglang/srt/openai_api/adapter.py | 32 +++++++++++++++++++++--- python/sglang/srt/openai_api/protocol.py | 1 + python/sglang/srt/reasoning_parser.py | 26 ++++++++++++++++++- 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index d13ad5ebf..43f591ef2 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -1001,6 +1001,11 @@ def v1_chat_generate_request( tokenize=True, add_generation_prompt=True, tools=tools, + **( + request.chat_template_kwargs + if request.chat_template_kwargs + else {} + ), ) except: # This except branch will be triggered when the chosen model @@ -1012,6 +1017,11 @@ def v1_chat_generate_request( tokenize=True, add_generation_prompt=True, tools=tools, + **( + request.chat_template_kwargs + if request.chat_template_kwargs + else {} + ), ) if assistant_prefix: @@ -1245,16 +1255,34 @@ def v1_chat_generate_response( tool_calls = None text = ret_item["text"] + enable_thinking = True if isinstance(request, list): tool_choice = request[idx].tool_choice tools = request[idx].tools separate_reasoning = request[idx].separate_reasoning + + if ( + request[idx].chat_template_kwargs + and request[idx].chat_template_kwargs.get("enable_thinking") is not None + ): + enable_thinking = request[idx].chat_template_kwargs.get( + "enable_thinking", True + ) else: tool_choice = request.tool_choice tools = request.tools separate_reasoning = request.separate_reasoning - if reasoning_parser and separate_reasoning: + if ( + request.chat_template_kwargs + and request.chat_template_kwargs.get("enable_thinking") is not None + ): + enable_thinking = request.chat_template_kwargs.get( + "enable_thinking", True + ) + + reasoning_text = None + if reasoning_parser and separate_reasoning and enable_thinking: try: parser = ReasoningParser( model_type=reasoning_parser, stream_reasoning=False @@ -1266,8 +1294,6 @@ def v1_chat_generate_response( HTTPStatus.BAD_REQUEST, "Failed to parse reasoning related info to json format!", ) - else: - reasoning_text = None if tool_choice != "none" and tools: parser = FunctionCallParser(tools, tool_call_parser) diff --git a/python/sglang/srt/openai_api/protocol.py b/python/sglang/srt/openai_api/protocol.py index 33644dd11..890f10d1f 100644 --- a/python/sglang/srt/openai_api/protocol.py +++ b/python/sglang/srt/openai_api/protocol.py @@ -361,6 +361,7 @@ class ChatCompletionRequest(BaseModel): session_params: Optional[Dict] = None separate_reasoning: bool = True stream_reasoning: bool = True + chat_template_kwargs: Optional[Dict] = None # For PD disaggregation bootstrap_host: Optional[str] = None diff --git a/python/sglang/srt/reasoning_parser.py b/python/sglang/srt/reasoning_parser.py index 22a73fbe2..977e26d3e 100644 --- a/python/sglang/srt/reasoning_parser.py +++ b/python/sglang/srt/reasoning_parser.py @@ -117,6 +117,29 @@ class DeepSeekR1Detector(BaseReasoningFormatDetector): # https://github.com/sgl-project/sglang/pull/3202#discussion_r1950153599 +class Qwen3Detector(BaseReasoningFormatDetector): + """ + Detector for Qwen3 model. + Assumes reasoning format: + ()*(.*) + Returns all the text before the tag as `reasoning_text` + and the rest of the text as `normal_text`. + + Args: + stream_reasoning (bool): If False, accumulates reasoning content until the end tag. + If True, streams reasoning content as it arrives. + """ + + def __init__(self, stream_reasoning: bool = True): + # Qwen3 is assumed to be reasoning until `` token + super().__init__( + "", + "", + force_reasoning=True, + stream_reasoning=stream_reasoning, + ) + + class ReasoningParser: """ Parser that handles both streaming and non-streaming scenarios for extracting @@ -129,7 +152,8 @@ class ReasoningParser: """ DetectorMap: Dict[str, BaseReasoningFormatDetector] = { - "deepseek-r1": DeepSeekR1Detector + "deepseek-r1": DeepSeekR1Detector, + "qwen3": Qwen3Detector, } def __init__(self, model_type: str = None, stream_reasoning: bool = True):