From 3e7ff1ab1fe8f97b22e16c2c3ce15f83d3a8618a Mon Sep 17 00:00:00 2001 From: Xinyuan Tong <115166877+JustinTong0323@users.noreply.github.com> Date: Thu, 7 Aug 2025 15:52:06 -0700 Subject: [PATCH] fix: reasoning parser when request have enable_thinking flag (#8933) Signed-off-by: Xinyuan Tong --- python/sglang/srt/entrypoints/openai/serving_chat.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/entrypoints/openai/serving_chat.py b/python/sglang/srt/entrypoints/openai/serving_chat.py index db7bc252c..51a4bd327 100644 --- a/python/sglang/srt/entrypoints/openai/serving_chat.py +++ b/python/sglang/srt/entrypoints/openai/serving_chat.py @@ -840,11 +840,15 @@ class OpenAIServingChat(OpenAIServingBase): reasoning_text = None reasoning_parser = self.tokenizer_manager.server_args.reasoning_parser if reasoning_parser and request.separate_reasoning: + is_force_reasoning = ( + self.template_manager.force_reasoning + or self._get_enable_thinking_from_request(request) + ) try: parser = ReasoningParser( model_type=reasoning_parser, stream_reasoning=False, - force_reasoning=self.template_manager.force_reasoning, + force_reasoning=is_force_reasoning, ) reasoning_text, text = parser.parse_non_stream(text) except Exception as e: @@ -1007,10 +1011,14 @@ class OpenAIServingChat(OpenAIServingBase): ) -> tuple[Optional[str], str]: """Process reasoning content in streaming response""" if index not in reasoning_parser_dict: + is_force_reasoning = ( + self.template_manager.force_reasoning + or self._get_enable_thinking_from_request(request) + ) reasoning_parser_dict[index] = ReasoningParser( self.tokenizer_manager.server_args.reasoning_parser, request.stream_reasoning, - self.template_manager.force_reasoning, + is_force_reasoning, ) reasoning_parser = reasoning_parser_dict[index] return reasoning_parser.parse_stream_chunk(delta)