Align finish reason and stream mode in openai api (#4388)
This commit is contained in:
@@ -645,7 +645,7 @@ def v1_generate_response(
|
||||
"index": 0,
|
||||
"text": text,
|
||||
"logprobs": logprobs,
|
||||
"finish_reason": (finish_reason["type"] if finish_reason else ""),
|
||||
"finish_reason": finish_reason["type"] if finish_reason else None,
|
||||
"matched_stop": (
|
||||
finish_reason["matched"]
|
||||
if finish_reason and "matched" in finish_reason
|
||||
@@ -657,7 +657,7 @@ def v1_generate_response(
|
||||
index=idx,
|
||||
text=text,
|
||||
logprobs=logprobs,
|
||||
finish_reason=(finish_reason["type"] if finish_reason else ""),
|
||||
finish_reason=finish_reason["type"] if finish_reason else None,
|
||||
matched_stop=(
|
||||
finish_reason["matched"]
|
||||
if finish_reason and "matched" in finish_reason
|
||||
@@ -805,7 +805,7 @@ async def v1_completions(tokenizer_manager, raw_request: Request):
|
||||
index=index,
|
||||
text=delta,
|
||||
logprobs=logprobs,
|
||||
finish_reason=(finish_reason["type"] if finish_reason else ""),
|
||||
finish_reason=finish_reason["type"] if finish_reason else None,
|
||||
matched_stop=(
|
||||
finish_reason["matched"]
|
||||
if finish_reason and "matched" in finish_reason
|
||||
@@ -1216,7 +1216,7 @@ def v1_chat_generate_response(
|
||||
"reasoning_content": reasoning_text if reasoning_text else None,
|
||||
},
|
||||
"logprobs": choice_logprobs.model_dump() if choice_logprobs else None,
|
||||
"finish_reason": (finish_reason["type"] if finish_reason else ""),
|
||||
"finish_reason": finish_reason["type"] if finish_reason else None,
|
||||
"matched_stop": (
|
||||
finish_reason["matched"]
|
||||
if finish_reason and "matched" in finish_reason
|
||||
@@ -1233,7 +1233,7 @@ def v1_chat_generate_response(
|
||||
reasoning_content=reasoning_text if reasoning_text else None,
|
||||
),
|
||||
logprobs=choice_logprobs,
|
||||
finish_reason=(finish_reason["type"] if finish_reason else ""),
|
||||
finish_reason=finish_reason["type"] if finish_reason else None,
|
||||
matched_stop=(
|
||||
finish_reason["matched"]
|
||||
if finish_reason and "matched" in finish_reason
|
||||
@@ -1377,23 +1377,11 @@ async def v1_chat_completions(
|
||||
if is_first:
|
||||
# First chunk with role
|
||||
is_first = False
|
||||
if (
|
||||
tokenizer_manager.server_args.reasoning_parser
|
||||
and request.separate_reasoning
|
||||
):
|
||||
delta = DeltaMessage(
|
||||
role="assistant", reasoning_content=None
|
||||
)
|
||||
else:
|
||||
delta = DeltaMessage(role="assistant", content=None)
|
||||
delta = DeltaMessage(role="assistant")
|
||||
choice_data = ChatCompletionResponseStreamChoice(
|
||||
index=index,
|
||||
delta=delta,
|
||||
finish_reason=(
|
||||
None
|
||||
if finish_reason_type and len(finish_reason_type) == 0
|
||||
else finish_reason_type
|
||||
),
|
||||
finish_reason=finish_reason_type,
|
||||
matched_stop=(
|
||||
finish_reason["matched"]
|
||||
if finish_reason and "matched" in finish_reason
|
||||
@@ -1434,12 +1422,7 @@ async def v1_chat_completions(
|
||||
reasoning_text if reasoning_text else None
|
||||
)
|
||||
),
|
||||
finish_reason=(
|
||||
None
|
||||
if finish_reason_type
|
||||
and len(finish_reason_type) == 0
|
||||
else finish_reason_type
|
||||
),
|
||||
finish_reason=finish_reason_type,
|
||||
)
|
||||
chunk = ChatCompletionStreamResponse(
|
||||
id=content["meta_info"]["id"],
|
||||
@@ -1471,12 +1454,7 @@ async def v1_chat_completions(
|
||||
delta=DeltaMessage(
|
||||
content=normal_text if normal_text else None
|
||||
),
|
||||
finish_reason=(
|
||||
None
|
||||
if finish_reason_type
|
||||
and len(finish_reason_type) == 0
|
||||
else finish_reason_type
|
||||
),
|
||||
finish_reason=finish_reason_type,
|
||||
)
|
||||
chunk = ChatCompletionStreamResponse(
|
||||
id=content["meta_info"]["id"],
|
||||
@@ -1490,11 +1468,7 @@ async def v1_chat_completions(
|
||||
for call_item in calls:
|
||||
# transform call_item -> FunctionResponse + ToolCall
|
||||
|
||||
if (
|
||||
content["meta_info"]["finish_reason"]
|
||||
and content["meta_info"]["finish_reason"]["type"]
|
||||
== "stop"
|
||||
):
|
||||
if finish_reason_type == "stop":
|
||||
latest_delta_len = 0
|
||||
if isinstance(call_item.parameters, str):
|
||||
latest_delta_len = len(call_item.parameters)
|
||||
@@ -1515,6 +1489,8 @@ async def v1_chat_completions(
|
||||
)
|
||||
call_item.parameters = remaining_call
|
||||
|
||||
finish_reason_type = "tool_calls"
|
||||
|
||||
tool_call = ToolCall(
|
||||
id=str(call_item.tool_index),
|
||||
function=FunctionResponse(
|
||||
@@ -1524,10 +1500,13 @@ async def v1_chat_completions(
|
||||
)
|
||||
choice_data = ChatCompletionResponseStreamChoice(
|
||||
index=index,
|
||||
delta=DeltaMessage(
|
||||
role="assistant", tool_calls=[tool_call]
|
||||
),
|
||||
finish_reason="tool_call",
|
||||
delta=DeltaMessage(tool_calls=[tool_call]),
|
||||
finish_reason=(
|
||||
None
|
||||
if request.stream_options
|
||||
and request.stream_options.include_usage
|
||||
else finish_reason_type
|
||||
), # additional chunk will be return
|
||||
)
|
||||
chunk = ChatCompletionStreamResponse(
|
||||
id=content["meta_info"]["id"],
|
||||
@@ -1542,30 +1521,44 @@ async def v1_chat_completions(
|
||||
|
||||
else:
|
||||
# No tool calls => just treat this as normal text
|
||||
choice_data = ChatCompletionResponseStreamChoice(
|
||||
index=index,
|
||||
delta=DeltaMessage(content=delta if delta else None),
|
||||
finish_reason=(
|
||||
None
|
||||
if finish_reason_type and len(finish_reason_type) == 0
|
||||
else finish_reason_type
|
||||
),
|
||||
matched_stop=(
|
||||
finish_reason["matched"]
|
||||
if finish_reason and "matched" in finish_reason
|
||||
else None
|
||||
),
|
||||
logprobs=choice_logprobs,
|
||||
)
|
||||
chunk = ChatCompletionStreamResponse(
|
||||
id=content["meta_info"]["id"],
|
||||
created=created,
|
||||
choices=[choice_data],
|
||||
model=request.model,
|
||||
)
|
||||
yield f"data: {chunk.model_dump_json()}\n\n"
|
||||
stream_buffers[index] = new_stream_buffer
|
||||
is_firsts[index] = is_first
|
||||
if delta or not (
|
||||
request.stream_options
|
||||
and request.stream_options.include_usage
|
||||
):
|
||||
choice_data = ChatCompletionResponseStreamChoice(
|
||||
index=index,
|
||||
delta=DeltaMessage(content=delta if delta else None),
|
||||
finish_reason=(
|
||||
None
|
||||
if request.stream_options
|
||||
and request.stream_options.include_usage
|
||||
else finish_reason_type
|
||||
),
|
||||
matched_stop=(
|
||||
finish_reason["matched"]
|
||||
if finish_reason and "matched" in finish_reason
|
||||
else None
|
||||
),
|
||||
logprobs=choice_logprobs,
|
||||
)
|
||||
chunk = ChatCompletionStreamResponse(
|
||||
id=content["meta_info"]["id"],
|
||||
created=created,
|
||||
choices=[choice_data],
|
||||
model=request.model,
|
||||
)
|
||||
yield f"data: {chunk.model_dump_json()}\n\n"
|
||||
stream_buffers[index] = new_stream_buffer
|
||||
is_firsts[index] = is_first
|
||||
if finish_reason_type == "stop" and request.tool_choice != "none":
|
||||
parser = FunctionCallParser(
|
||||
tools=request.tools,
|
||||
tool_call_parser=tokenizer_manager.server_args.tool_call_parser,
|
||||
)
|
||||
if parser.has_tool_call(new_stream_buffer):
|
||||
# if the stream ends with empty string after tool calls
|
||||
finish_reason_type = "tool_calls"
|
||||
|
||||
if request.stream_options and request.stream_options.include_usage:
|
||||
total_prompt_tokens = sum(
|
||||
tokens
|
||||
@@ -1590,17 +1583,22 @@ async def v1_chat_completions(
|
||||
prompt_tokens_details=prompt_tokens_details,
|
||||
)
|
||||
|
||||
final_usage_chunk = ChatCompletionStreamResponse(
|
||||
id=content["meta_info"]["id"],
|
||||
created=created,
|
||||
choices=[],
|
||||
model=request.model,
|
||||
usage=usage,
|
||||
)
|
||||
final_usage_data = final_usage_chunk.model_dump_json(
|
||||
exclude_none=True
|
||||
)
|
||||
yield f"data: {final_usage_data}\n\n"
|
||||
else:
|
||||
usage = None
|
||||
final_usage_chunk = ChatCompletionStreamResponse(
|
||||
id=content["meta_info"]["id"],
|
||||
created=created,
|
||||
choices=[
|
||||
ChatCompletionResponseStreamChoice(
|
||||
index=index,
|
||||
delta=DeltaMessage(),
|
||||
finish_reason=finish_reason_type,
|
||||
)
|
||||
],
|
||||
model=request.model,
|
||||
usage=usage,
|
||||
)
|
||||
yield f"data: {final_usage_chunk.model_dump_json()}\n\n"
|
||||
except ValueError as e:
|
||||
error = create_streaming_error_response(str(e))
|
||||
yield f"data: {error}\n\n"
|
||||
|
||||
@@ -187,7 +187,7 @@ class CompletionResponseChoice(BaseModel):
|
||||
index: int
|
||||
text: str
|
||||
logprobs: Optional[LogProbs] = None
|
||||
finish_reason: Optional[str] = None
|
||||
finish_reason: Literal["stop", "length", "content_filter"]
|
||||
matched_stop: Union[None, int, str] = None
|
||||
|
||||
|
||||
@@ -204,7 +204,7 @@ class CompletionResponseStreamChoice(BaseModel):
|
||||
index: int
|
||||
text: str
|
||||
logprobs: Optional[LogProbs] = None
|
||||
finish_reason: Optional[str] = None
|
||||
finish_reason: Optional[Literal["stop", "length", "content_filter"]] = None
|
||||
matched_stop: Union[None, int, str] = None
|
||||
|
||||
|
||||
@@ -387,7 +387,9 @@ class ChatCompletionResponseChoice(BaseModel):
|
||||
index: int
|
||||
message: ChatMessage
|
||||
logprobs: Optional[Union[LogProbs, ChoiceLogprobs]] = None
|
||||
finish_reason: str
|
||||
finish_reason: Literal[
|
||||
"stop", "length", "tool_calls", "content_filter", "function_call"
|
||||
]
|
||||
matched_stop: Union[None, int, str] = None
|
||||
|
||||
|
||||
@@ -411,7 +413,9 @@ class ChatCompletionResponseStreamChoice(BaseModel):
|
||||
index: int
|
||||
delta: DeltaMessage
|
||||
logprobs: Optional[Union[LogProbs, ChoiceLogprobs]] = None
|
||||
finish_reason: Optional[str] = None
|
||||
finish_reason: Optional[
|
||||
Literal["stop", "length", "tool_calls", "content_filter", "function_call"]
|
||||
] = None
|
||||
matched_stop: Union[None, int, str] = None
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user