Revert "bugfix: Fix output_ids extraction in detokenizer_manager" (#9467)
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# Copied from vLLM: https://github.com/zyongye/vllm/blob/6a70830065701b163e36a86fd331b41b5feac401/vllm/entrypoints/context.py
|
||||
# Copied from vLLM
|
||||
import json
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
@@ -83,6 +83,14 @@ class HarmonyContext(ConversationContext):
|
||||
if isinstance(output, dict) and "output_ids" in output:
|
||||
output_token_ids = output["output_ids"]
|
||||
|
||||
# TODO: REMOVE here:
|
||||
# Very hacky, find the first occurrence of token 200006 and cut from there
|
||||
try:
|
||||
start_index = output_token_ids.index(200006)
|
||||
output_token_ids = output_token_ids[start_index:]
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
for token_id in output_token_ids:
|
||||
self.parser.process(token_id)
|
||||
output_msgs = self.parser.messages
|
||||
@@ -190,6 +198,15 @@ class StreamingHarmonyContext(HarmonyContext):
|
||||
# RequestOutput from SGLang with outputs
|
||||
output_token_ids = output["output_ids"]
|
||||
|
||||
# TODO: REMOVE here:
|
||||
# Very hacky, find the first occurrence of token 200006 and cut from there
|
||||
# Find the first occurrence of token 200006 and cut from there
|
||||
try:
|
||||
start_index = output_token_ids.index(200006)
|
||||
output_token_ids = output_token_ids[start_index:]
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
for token_id in output_token_ids:
|
||||
self.parser.process(token_id)
|
||||
|
||||
|
||||
@@ -216,7 +216,7 @@ class DetokenizerManager:
|
||||
rids=recv_obj.rids,
|
||||
finished_reasons=recv_obj.finished_reasons,
|
||||
output_strs=output_strs,
|
||||
output_ids=recv_obj.output_ids,
|
||||
output_ids=recv_obj.decode_ids,
|
||||
prompt_tokens=recv_obj.prompt_tokens,
|
||||
completion_tokens=recv_obj.completion_tokens,
|
||||
cached_tokens=recv_obj.cached_tokens,
|
||||
|
||||
Reference in New Issue
Block a user