Constraint Decoding: Tool call with text (#4067)
This commit is contained in:
@@ -1130,7 +1130,7 @@ def v1_chat_generate_response(
|
||||
finish_reason["type"] = "tool_calls"
|
||||
finish_reason["matched"] = None
|
||||
try:
|
||||
full_normal_text, call_info_list = parser.parse_non_stream(text)
|
||||
text, call_info_list = parser.parse_non_stream(text)
|
||||
tool_calls = [
|
||||
ToolCall(
|
||||
id=str(call_info.tool_index),
|
||||
@@ -1153,9 +1153,9 @@ def v1_chat_generate_response(
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": text if tool_calls is None else None,
|
||||
"content": text if text else None,
|
||||
"tool_calls": tool_calls,
|
||||
"reasoning_content": reasoning_text,
|
||||
"reasoning_content": reasoning_text if reasoning_text else None,
|
||||
},
|
||||
"logprobs": choice_logprobs.model_dump() if choice_logprobs else None,
|
||||
"finish_reason": (finish_reason["type"] if finish_reason else ""),
|
||||
@@ -1170,9 +1170,9 @@ def v1_chat_generate_response(
|
||||
index=idx,
|
||||
message=ChatMessage(
|
||||
role="assistant",
|
||||
content=text if tool_calls is None else None,
|
||||
content=text if text else None,
|
||||
tool_calls=tool_calls,
|
||||
reasoning_content=reasoning_text,
|
||||
reasoning_content=reasoning_text if reasoning_text else None,
|
||||
),
|
||||
logprobs=choice_logprobs,
|
||||
finish_reason=(finish_reason["type"] if finish_reason else ""),
|
||||
@@ -1317,9 +1317,11 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
|
||||
tokenizer_manager.server_args.reasoning_parser
|
||||
and request.separate_reasoning
|
||||
):
|
||||
delta = DeltaMessage(role="assistant", reasoning_content="")
|
||||
delta = DeltaMessage(
|
||||
role="assistant", reasoning_content=None
|
||||
)
|
||||
else:
|
||||
delta = DeltaMessage(role="assistant", content="")
|
||||
delta = DeltaMessage(role="assistant", content=None)
|
||||
choice_data = ChatCompletionResponseStreamChoice(
|
||||
index=index,
|
||||
delta=delta,
|
||||
@@ -1362,7 +1364,11 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
|
||||
if reasoning_text:
|
||||
choice_data = ChatCompletionResponseStreamChoice(
|
||||
index=index,
|
||||
delta=DeltaMessage(reasoning_content=reasoning_text),
|
||||
delta=DeltaMessage(
|
||||
reasoning_content=(
|
||||
reasoning_text if reasoning_text else None
|
||||
)
|
||||
),
|
||||
finish_reason=(
|
||||
None
|
||||
if finish_reason_type
|
||||
@@ -1396,7 +1402,9 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
|
||||
if normal_text:
|
||||
choice_data = ChatCompletionResponseStreamChoice(
|
||||
index=index,
|
||||
delta=DeltaMessage(content=normal_text),
|
||||
delta=DeltaMessage(
|
||||
content=normal_text if normal_text else None
|
||||
),
|
||||
finish_reason=(
|
||||
None
|
||||
if finish_reason_type
|
||||
@@ -1468,7 +1476,7 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
|
||||
# No tool calls => just treat this as normal text
|
||||
choice_data = ChatCompletionResponseStreamChoice(
|
||||
index=index,
|
||||
delta=DeltaMessage(content=delta),
|
||||
delta=DeltaMessage(content=delta if delta else None),
|
||||
finish_reason=(
|
||||
None
|
||||
if finish_reason_type and len(finish_reason_type) == 0
|
||||
|
||||
Reference in New Issue
Block a user