fix serving issues when requesting real data

This commit is contained in:
2026-06-12 17:57:23 +08:00
parent 50e3a05fb0
commit 3b8a567e9e
2 changed files with 36 additions and 8 deletions

View File

@@ -113,6 +113,11 @@ class ConversationMessage(TypedDict, total=False):
tool_calls: Optional[Iterable[ChatCompletionMessageToolCallParam]]
"""The tool calls generated by the model, such as function calls."""
reasoning_content: Optional[str]
"""Reasoning / thinking content for assistant messages.
Passed directly to the chat template (Qwen3 reads message.reasoning_content
natively) instead of being manually wrapped in <think>...</think>."""
ModalityStr = Literal["image", "audio", "video"]
_T = TypeVar("_T")
@@ -480,15 +485,13 @@ def _parse_chat_message_content(
if "tool_calls" in parsed_msg:
result_msg["tool_calls"] = list(parsed_msg["tool_calls"])
# Prepend reasoning_content as <think>...</think> so the model
# sees its own chain-of-thought in multi-turn conversations.
reasoning = message.get("reasoning_content") # type: ignore[arg-type]
# Pass reasoning content as a dedicated field so the chat template
# can render it natively (Qwen3: message.reasoning_content branch).
# Accept both "reasoning" (new vllm) and "reasoning_content" (ours).
reasoning = (message.get("reasoning") # type: ignore[arg-type]
or message.get("reasoning_content")) # type: ignore[arg-type]
if reasoning and isinstance(reasoning, str):
existing = result_msg.get("content") or ""
result_msg["content"] = (
f"<think>{reasoning}</think>\n\n{existing}"
if existing else f"<think>{reasoning}</think>"
)
result_msg["reasoning_content"] = reasoning
elif role == "tool":
parsed_msg = _ToolParser(message)