bugfix: Fix the commentary msg extraction in GptOssDetector (#9097)

This commit is contained in:
Chang Su
2025-08-11 23:53:10 -07:00
committed by GitHub
parent b4ac2b9c0c
commit 9c83d74da3

View File

@@ -295,15 +295,15 @@ class GptOssDetector(BaseReasoningFormatDetector):
for match in reversed(list(commentary_pattern.finditer(full_normal_text))): for match in reversed(list(commentary_pattern.finditer(full_normal_text))):
# Check if this commentary is a tool call by looking at the text before <|message|> # Check if this commentary is a tool call by looking at the text before <|message|>
match_start = match.start() match_start = match.start()
# Find the start of this commentary section # Find where "<|channel|>commentary" starts within the matched pattern
commentary_start = full_normal_text.rfind( # The pattern starts with "<|start|>assistant<|channel|>commentary"
"<|channel|>commentary", 0, match_start # So we look for the text between "commentary" and "<|message|>" in the match
) match_text = full_normal_text[match_start : match.end()]
if commentary_start != -1: commentary_idx = match_text.find("<|channel|>commentary")
# Extract text between "commentary" and "<|message|>" if commentary_idx != -1:
message_pos = full_normal_text.find("<|message|>", commentary_start) message_idx = match_text.find("<|message|>", commentary_idx)
if message_pos != -1: if message_idx != -1:
between_text = full_normal_text[commentary_start:message_pos] between_text = match_text[commentary_idx:message_idx]
# If no "to=" found, this is regular commentary (reasoning content) # If no "to=" found, this is regular commentary (reasoning content)
if " to=" not in between_text: if " to=" not in between_text:
content = match.group(1).strip() content = match.group(1).strip()