fix: reasoning parser when request have enable_thinking flag (#8933)

Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
This commit is contained in:
Xinyuan Tong
2025-08-07 15:52:06 -07:00
committed by GitHub
parent aaf0ad8cdf
commit 3e7ff1ab1f

View File

@@ -840,11 +840,15 @@ class OpenAIServingChat(OpenAIServingBase):
reasoning_text = None
reasoning_parser = self.tokenizer_manager.server_args.reasoning_parser
if reasoning_parser and request.separate_reasoning:
is_force_reasoning = (
self.template_manager.force_reasoning
or self._get_enable_thinking_from_request(request)
)
try:
parser = ReasoningParser(
model_type=reasoning_parser,
stream_reasoning=False,
force_reasoning=self.template_manager.force_reasoning,
force_reasoning=is_force_reasoning,
)
reasoning_text, text = parser.parse_non_stream(text)
except Exception as e:
@@ -1007,10 +1011,14 @@ class OpenAIServingChat(OpenAIServingBase):
) -> tuple[Optional[str], str]:
"""Process reasoning content in streaming response"""
if index not in reasoning_parser_dict:
is_force_reasoning = (
self.template_manager.force_reasoning
or self._get_enable_thinking_from_request(request)
)
reasoning_parser_dict[index] = ReasoningParser(
self.tokenizer_manager.server_args.reasoning_parser,
request.stream_reasoning,
self.template_manager.force_reasoning,
is_force_reasoning,
)
reasoning_parser = reasoning_parser_dict[index]
return reasoning_parser.parse_stream_chunk(delta)