bugfix: Fix output_ids extraction in detokenizer_manager (#9047)

2025-08-11 03:17:32 -07:00
parent f4ae50e97c
commit a6452b7188
2 changed files with 2 additions and 19 deletions
--- a/python/sglang/srt/entrypoints/context.py
+++ b/python/sglang/srt/entrypoints/context.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
-# Copied from vLLM
+# Copied from vLLM: https://github.com/zyongye/vllm/blob/6a70830065701b163e36a86fd331b41b5feac401/vllm/entrypoints/context.py
 import json
 import logging
 from abc import ABC, abstractmethod
@@ -83,14 +83,6 @@ class HarmonyContext(ConversationContext):
        if isinstance(output, dict) and "output_ids" in output:
            output_token_ids = output["output_ids"]
            # TODO: REMOVE here:
            # Very hacky, find the first occurrence of token 200006 and cut from there
            try:
                start_index = output_token_ids.index(200006)
                output_token_ids = output_token_ids[start_index:]
            except ValueError:
                pass
            for token_id in output_token_ids:
                self.parser.process(token_id)
            output_msgs = self.parser.messages
@@ -196,15 +188,6 @@ class StreamingHarmonyContext(HarmonyContext):
            # RequestOutput from SGLang with outputs
            output_token_ids = output["output_ids"]
            # TODO: REMOVE here:
            # Very hacky, find the first occurrence of token 200006 and cut from there
            # Find the first occurrence of token 200006 and cut from there
            try:
                start_index = output_token_ids.index(200006)
                output_token_ids = output_token_ids[start_index:]
            except ValueError:
                pass
            for token_id in output_token_ids:
                self.parser.process(token_id)
--- a/python/sglang/srt/managers/detokenizer_manager.py
+++ b/python/sglang/srt/managers/detokenizer_manager.py
@@ -216,7 +216,7 @@ class DetokenizerManager:
            rids=recv_obj.rids,
            finished_reasons=recv_obj.finished_reasons,
            output_strs=output_strs,
-            output_ids=recv_obj.decode_ids,
+            output_ids=recv_obj.output_ids,
            prompt_tokens=recv_obj.prompt_tokens,
            completion_tokens=recv_obj.completion_tokens,
            cached_tokens=recv_obj.cached_tokens,