Revert "bugfix: Fix output_ids extraction in detokenizer_manager" (#9467)

2025-08-22 08:24:25 +08:00
parent 0f9318f7d0
commit 6c855db82c
2 changed files with 19 additions and 2 deletions
--- a/python/sglang/srt/entrypoints/context.py
+++ b/python/sglang/srt/entrypoints/context.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
-# Copied from vLLM: https://github.com/zyongye/vllm/blob/6a70830065701b163e36a86fd331b41b5feac401/vllm/entrypoints/context.py
+# Copied from vLLM
 import json
 import logging
 from abc import ABC, abstractmethod
@@ -83,6 +83,14 @@ class HarmonyContext(ConversationContext):
        if isinstance(output, dict) and "output_ids" in output:
            output_token_ids = output["output_ids"]

+            # TODO: REMOVE here:
+            # Very hacky, find the first occurrence of token 200006 and cut from there
+            try:
+                start_index = output_token_ids.index(200006)
+                output_token_ids = output_token_ids[start_index:]
+            except ValueError:
+                pass
+
            for token_id in output_token_ids:
                self.parser.process(token_id)
            output_msgs = self.parser.messages
@@ -190,6 +198,15 @@ class StreamingHarmonyContext(HarmonyContext):
            # RequestOutput from SGLang with outputs
            output_token_ids = output["output_ids"]

+            # TODO: REMOVE here:
+            # Very hacky, find the first occurrence of token 200006 and cut from there
+            # Find the first occurrence of token 200006 and cut from there
+            try:
+                start_index = output_token_ids.index(200006)
+                output_token_ids = output_token_ids[start_index:]
+            except ValueError:
+                pass
+
            for token_id in output_token_ids:
                self.parser.process(token_id)

--- a/python/sglang/srt/managers/detokenizer_manager.py
+++ b/python/sglang/srt/managers/detokenizer_manager.py
@@ -216,7 +216,7 @@ class DetokenizerManager:
            rids=recv_obj.rids,
            finished_reasons=recv_obj.finished_reasons,
            output_strs=output_strs,
-            output_ids=recv_obj.output_ids,
+            output_ids=recv_obj.decode_ids,
            prompt_tokens=recv_obj.prompt_tokens,
            completion_tokens=recv_obj.completion_tokens,
            cached_tokens=recv_obj.cached_tokens,