diff --git a/python/sglang/srt/entrypoints/context.py b/python/sglang/srt/entrypoints/context.py index e7a0c07cf..ae4605374 100644 --- a/python/sglang/srt/entrypoints/context.py +++ b/python/sglang/srt/entrypoints/context.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -# Copied from vLLM +# Copied from vLLM: https://github.com/zyongye/vllm/blob/6a70830065701b163e36a86fd331b41b5feac401/vllm/entrypoints/context.py import json import logging from abc import ABC, abstractmethod @@ -83,14 +83,6 @@ class HarmonyContext(ConversationContext): if isinstance(output, dict) and "output_ids" in output: output_token_ids = output["output_ids"] - # TODO: REMOVE here: - # Very hacky, find the first occurrence of token 200006 and cut from there - try: - start_index = output_token_ids.index(200006) - output_token_ids = output_token_ids[start_index:] - except ValueError: - pass - for token_id in output_token_ids: self.parser.process(token_id) output_msgs = self.parser.messages @@ -196,15 +188,6 @@ class StreamingHarmonyContext(HarmonyContext): # RequestOutput from SGLang with outputs output_token_ids = output["output_ids"] - # TODO: REMOVE here: - # Very hacky, find the first occurrence of token 200006 and cut from there - # Find the first occurrence of token 200006 and cut from there - try: - start_index = output_token_ids.index(200006) - output_token_ids = output_token_ids[start_index:] - except ValueError: - pass - for token_id in output_token_ids: self.parser.process(token_id) diff --git a/python/sglang/srt/managers/detokenizer_manager.py b/python/sglang/srt/managers/detokenizer_manager.py index 29757b4b2..34a29ec17 100644 --- a/python/sglang/srt/managers/detokenizer_manager.py +++ b/python/sglang/srt/managers/detokenizer_manager.py @@ -216,7 +216,7 @@ class DetokenizerManager: rids=recv_obj.rids, finished_reasons=recv_obj.finished_reasons, output_strs=output_strs, - output_ids=recv_obj.decode_ids, + output_ids=recv_obj.output_ids, prompt_tokens=recv_obj.prompt_tokens, completion_tokens=recv_obj.completion_tokens, cached_tokens=recv_obj.cached_tokens,