Fix Harmony reasoning parser for and auto-separation for gpt-oss models (#9190)

Co-authored-by: Chang Su <chang.s.su@oracle.com> Co-authored-by: Chayenne <zhaochen20@outlook.com> Co-authored-by: zhaochenyang20 <zhaochenyang20@gmail.com> Co-authored-by: minleminzui <2969413251@qq.com> Co-authored-by: maocheng23 <maocheng@berkeley.edu> Co-authored-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
2025-08-26 00:26:26 +02:00
parent 24a8cee66d
commit a0a77d937b
8 changed files with 1676 additions and 551 deletions
--- a/python/sglang/srt/managers/detokenizer_manager.py
+++ b/python/sglang/srt/managers/detokenizer_manager.py
@@ -106,6 +106,8 @@ class DetokenizerManager:
            ]
        )

+        self.is_tool_call_parser_gpt_oss = server_args.tool_call_parser == "gpt-oss"
+
    def event_loop(self):
        """The event loop that handles requests"""
        while True:
@@ -133,6 +135,9 @@ class DetokenizerManager:

        # Trim stop token.
        if isinstance(matched, int) and isinstance(output, list):
+            # 200012 <|call|> is the tool call token and one of eos tokens for gpt-oss model
+            if output[-1] == 200012 and self.is_tool_call_parser_gpt_oss:
+                return output
            assert len(output) > 0
            return output[:-1]
        return output