bugfix: Fix output_ids extraction in detokenizer_manager (#9047)
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# Copied from vLLM
|
# Copied from vLLM: https://github.com/zyongye/vllm/blob/6a70830065701b163e36a86fd331b41b5feac401/vllm/entrypoints/context.py
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
@@ -83,14 +83,6 @@ class HarmonyContext(ConversationContext):
|
|||||||
if isinstance(output, dict) and "output_ids" in output:
|
if isinstance(output, dict) and "output_ids" in output:
|
||||||
output_token_ids = output["output_ids"]
|
output_token_ids = output["output_ids"]
|
||||||
|
|
||||||
# TODO: REMOVE here:
|
|
||||||
# Very hacky, find the first occurrence of token 200006 and cut from there
|
|
||||||
try:
|
|
||||||
start_index = output_token_ids.index(200006)
|
|
||||||
output_token_ids = output_token_ids[start_index:]
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
for token_id in output_token_ids:
|
for token_id in output_token_ids:
|
||||||
self.parser.process(token_id)
|
self.parser.process(token_id)
|
||||||
output_msgs = self.parser.messages
|
output_msgs = self.parser.messages
|
||||||
@@ -196,15 +188,6 @@ class StreamingHarmonyContext(HarmonyContext):
|
|||||||
# RequestOutput from SGLang with outputs
|
# RequestOutput from SGLang with outputs
|
||||||
output_token_ids = output["output_ids"]
|
output_token_ids = output["output_ids"]
|
||||||
|
|
||||||
# TODO: REMOVE here:
|
|
||||||
# Very hacky, find the first occurrence of token 200006 and cut from there
|
|
||||||
# Find the first occurrence of token 200006 and cut from there
|
|
||||||
try:
|
|
||||||
start_index = output_token_ids.index(200006)
|
|
||||||
output_token_ids = output_token_ids[start_index:]
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
for token_id in output_token_ids:
|
for token_id in output_token_ids:
|
||||||
self.parser.process(token_id)
|
self.parser.process(token_id)
|
||||||
|
|
||||||
|
|||||||
@@ -216,7 +216,7 @@ class DetokenizerManager:
|
|||||||
rids=recv_obj.rids,
|
rids=recv_obj.rids,
|
||||||
finished_reasons=recv_obj.finished_reasons,
|
finished_reasons=recv_obj.finished_reasons,
|
||||||
output_strs=output_strs,
|
output_strs=output_strs,
|
||||||
output_ids=recv_obj.decode_ids,
|
output_ids=recv_obj.output_ids,
|
||||||
prompt_tokens=recv_obj.prompt_tokens,
|
prompt_tokens=recv_obj.prompt_tokens,
|
||||||
completion_tokens=recv_obj.completion_tokens,
|
completion_tokens=recv_obj.completion_tokens,
|
||||||
cached_tokens=recv_obj.cached_tokens,
|
cached_tokens=recv_obj.cached_tokens,
|
||||||
|
|||||||
Reference in New Issue
Block a user