feat(oai refactor): Replace openai_api with entrypoints/openai (#7351)

Co-authored-by: Jin Pan <jpan236@wisc.edu>
This commit is contained in:
Chang Su
2025-06-21 13:21:06 -07:00
committed by GitHub
parent 02bf31ef29
commit 72676cd6c0
43 changed files with 674 additions and 4555 deletions

View File

@@ -1,4 +1,4 @@
from typing import Dict, Tuple
from typing import Dict, Optional, Tuple, Type
class StreamingParseResult:
@@ -32,17 +32,26 @@ class BaseReasoningFormatDetector:
One-time parsing: Detects and parses reasoning sections in the provided text.
Returns both reasoning content and normal text separately.
"""
text = text.replace(self.think_start_token, "").strip()
if self.think_end_token not in text:
in_reasoning = self._in_reasoning or text.startswith(self.think_start_token)
if not in_reasoning:
return StreamingParseResult(normal_text=text)
# The text is considered to be in a reasoning block.
processed_text = text.replace(self.think_start_token, "").strip()
if self.think_end_token not in processed_text:
# Assume reasoning was truncated before `</think>` token
return StreamingParseResult(reasoning_text=text)
return StreamingParseResult(reasoning_text=processed_text)
# Extract reasoning content
splits = text.split(self.think_end_token, maxsplit=1)
splits = processed_text.split(self.think_end_token, maxsplit=1)
reasoning_text = splits[0]
text = splits[1].strip()
normal_text = splits[1].strip()
return StreamingParseResult(normal_text=text, reasoning_text=reasoning_text)
return StreamingParseResult(
normal_text=normal_text, reasoning_text=reasoning_text
)
def parse_streaming_increment(self, new_text: str) -> StreamingParseResult:
"""
@@ -61,6 +70,7 @@ class BaseReasoningFormatDetector:
if not self.stripped_think_start and self.think_start_token in current_text:
current_text = current_text.replace(self.think_start_token, "")
self.stripped_think_start = True
self._in_reasoning = True
# Handle end of reasoning block
if self._in_reasoning and self.think_end_token in current_text:
@@ -131,11 +141,11 @@ class Qwen3Detector(BaseReasoningFormatDetector):
"""
def __init__(self, stream_reasoning: bool = True):
# Qwen3 is assumed to be reasoning until `</think>` token
# Qwen3 won't be in reasoning mode when user passes `enable_thinking=False`
super().__init__(
"<think>",
"</think>",
force_reasoning=True,
force_reasoning=False,
stream_reasoning=stream_reasoning,
)
@@ -151,12 +161,12 @@ class ReasoningParser:
If True, streams reasoning content as it arrives.
"""
DetectorMap: Dict[str, BaseReasoningFormatDetector] = {
DetectorMap: Dict[str, Type[BaseReasoningFormatDetector]] = {
"deepseek-r1": DeepSeekR1Detector,
"qwen3": Qwen3Detector,
}
def __init__(self, model_type: str = None, stream_reasoning: bool = True):
def __init__(self, model_type: Optional[str] = None, stream_reasoning: bool = True):
if not model_type:
raise ValueError("Model type must be specified")