Support v1/responses and use harmony in serving_chat (#8837)
Signed-off-by: Xinyuan Tong <justinning0323@outlook.com> Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com> Co-authored-by: Xinyuan Tong <justinning0323@outlook.com> Co-authored-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
This commit is contained in:
@@ -750,7 +750,11 @@ class TokenizerManager:
|
||||
try:
|
||||
await asyncio.wait_for(state.event.wait(), timeout=4)
|
||||
except asyncio.TimeoutError:
|
||||
if request is not None and await request.is_disconnected():
|
||||
if (
|
||||
request is not None
|
||||
and not obj.background
|
||||
and await request.is_disconnected()
|
||||
):
|
||||
# Abort the request for disconnected requests (non-streaming, waiting queue)
|
||||
self.abort_request(obj.rid)
|
||||
# Use exception to kill the whole call stack and asyncio task
|
||||
@@ -805,7 +809,11 @@ class TokenizerManager:
|
||||
if obj.stream:
|
||||
yield out
|
||||
else:
|
||||
if request is not None and await request.is_disconnected():
|
||||
if (
|
||||
request is not None
|
||||
and not obj.background
|
||||
and await request.is_disconnected()
|
||||
):
|
||||
# Abort the request for disconnected requests (non-streaming, running)
|
||||
self.abort_request(obj.rid)
|
||||
# Use exception to kill the whole call stack and asyncio task
|
||||
@@ -1548,8 +1556,17 @@ class TokenizerManager:
|
||||
|
||||
if isinstance(recv_obj, BatchStrOut):
|
||||
state.text += recv_obj.output_strs[i]
|
||||
if state.obj.stream:
|
||||
state.output_ids.extend(recv_obj.output_ids[i])
|
||||
output_token_ids = state.output_ids[state.last_output_offset :]
|
||||
state.last_output_offset = len(state.output_ids)
|
||||
else:
|
||||
state.output_ids.extend(recv_obj.output_ids[i])
|
||||
output_token_ids = state.output_ids.copy()
|
||||
|
||||
out_dict = {
|
||||
"text": state.text,
|
||||
"output_ids": output_token_ids,
|
||||
"meta_info": meta_info,
|
||||
}
|
||||
elif isinstance(recv_obj, BatchTokenIDOut):
|
||||
|
||||
Reference in New Issue
Block a user