diff --git a/python/sglang/srt/entrypoints/openai/serving_chat.py b/python/sglang/srt/entrypoints/openai/serving_chat.py index db7bc252c..51a4bd327 100644 --- a/python/sglang/srt/entrypoints/openai/serving_chat.py +++ b/python/sglang/srt/entrypoints/openai/serving_chat.py @@ -840,11 +840,15 @@ class OpenAIServingChat(OpenAIServingBase): reasoning_text = None reasoning_parser = self.tokenizer_manager.server_args.reasoning_parser if reasoning_parser and request.separate_reasoning: + is_force_reasoning = ( + self.template_manager.force_reasoning + or self._get_enable_thinking_from_request(request) + ) try: parser = ReasoningParser( model_type=reasoning_parser, stream_reasoning=False, - force_reasoning=self.template_manager.force_reasoning, + force_reasoning=is_force_reasoning, ) reasoning_text, text = parser.parse_non_stream(text) except Exception as e: @@ -1007,10 +1011,14 @@ class OpenAIServingChat(OpenAIServingBase): ) -> tuple[Optional[str], str]: """Process reasoning content in streaming response""" if index not in reasoning_parser_dict: + is_force_reasoning = ( + self.template_manager.force_reasoning + or self._get_enable_thinking_from_request(request) + ) reasoning_parser_dict[index] = ReasoningParser( self.tokenizer_manager.server_args.reasoning_parser, request.stream_reasoning, - self.template_manager.force_reasoning, + is_force_reasoning, ) reasoning_parser = reasoning_parser_dict[index] return reasoning_parser.parse_stream_chunk(delta)