Constraint Decoding: Tool call with text (#4067)

This commit is contained in:
Xihuai Wang
2025-03-17 16:06:46 +08:00
committed by GitHub
parent ef3c2dd08e
commit 927ca935a7
4 changed files with 143 additions and 102 deletions

View File

@@ -1130,7 +1130,7 @@ def v1_chat_generate_response(
finish_reason["type"] = "tool_calls"
finish_reason["matched"] = None
try:
full_normal_text, call_info_list = parser.parse_non_stream(text)
text, call_info_list = parser.parse_non_stream(text)
tool_calls = [
ToolCall(
id=str(call_info.tool_index),
@@ -1153,9 +1153,9 @@ def v1_chat_generate_response(
"index": 0,
"message": {
"role": "assistant",
"content": text if tool_calls is None else None,
"content": text if text else None,
"tool_calls": tool_calls,
"reasoning_content": reasoning_text,
"reasoning_content": reasoning_text if reasoning_text else None,
},
"logprobs": choice_logprobs.model_dump() if choice_logprobs else None,
"finish_reason": (finish_reason["type"] if finish_reason else ""),
@@ -1170,9 +1170,9 @@ def v1_chat_generate_response(
index=idx,
message=ChatMessage(
role="assistant",
content=text if tool_calls is None else None,
content=text if text else None,
tool_calls=tool_calls,
reasoning_content=reasoning_text,
reasoning_content=reasoning_text if reasoning_text else None,
),
logprobs=choice_logprobs,
finish_reason=(finish_reason["type"] if finish_reason else ""),
@@ -1317,9 +1317,11 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
tokenizer_manager.server_args.reasoning_parser
and request.separate_reasoning
):
delta = DeltaMessage(role="assistant", reasoning_content="")
delta = DeltaMessage(
role="assistant", reasoning_content=None
)
else:
delta = DeltaMessage(role="assistant", content="")
delta = DeltaMessage(role="assistant", content=None)
choice_data = ChatCompletionResponseStreamChoice(
index=index,
delta=delta,
@@ -1362,7 +1364,11 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
if reasoning_text:
choice_data = ChatCompletionResponseStreamChoice(
index=index,
delta=DeltaMessage(reasoning_content=reasoning_text),
delta=DeltaMessage(
reasoning_content=(
reasoning_text if reasoning_text else None
)
),
finish_reason=(
None
if finish_reason_type
@@ -1396,7 +1402,9 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
if normal_text:
choice_data = ChatCompletionResponseStreamChoice(
index=index,
delta=DeltaMessage(content=normal_text),
delta=DeltaMessage(
content=normal_text if normal_text else None
),
finish_reason=(
None
if finish_reason_type
@@ -1468,7 +1476,7 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
# No tool calls => just treat this as normal text
choice_data = ChatCompletionResponseStreamChoice(
index=index,
delta=DeltaMessage(content=delta),
delta=DeltaMessage(content=delta if delta else None),
finish_reason=(
None
if finish_reason_type and len(finish_reason_type) == 0