fix serving issues when requesting real data
This commit is contained in:
@@ -113,6 +113,11 @@ class ConversationMessage(TypedDict, total=False):
|
|||||||
tool_calls: Optional[Iterable[ChatCompletionMessageToolCallParam]]
|
tool_calls: Optional[Iterable[ChatCompletionMessageToolCallParam]]
|
||||||
"""The tool calls generated by the model, such as function calls."""
|
"""The tool calls generated by the model, such as function calls."""
|
||||||
|
|
||||||
|
reasoning_content: Optional[str]
|
||||||
|
"""Reasoning / thinking content for assistant messages.
|
||||||
|
Passed directly to the chat template (Qwen3 reads message.reasoning_content
|
||||||
|
natively) instead of being manually wrapped in <think>...</think>."""
|
||||||
|
|
||||||
|
|
||||||
ModalityStr = Literal["image", "audio", "video"]
|
ModalityStr = Literal["image", "audio", "video"]
|
||||||
_T = TypeVar("_T")
|
_T = TypeVar("_T")
|
||||||
@@ -480,15 +485,13 @@ def _parse_chat_message_content(
|
|||||||
if "tool_calls" in parsed_msg:
|
if "tool_calls" in parsed_msg:
|
||||||
result_msg["tool_calls"] = list(parsed_msg["tool_calls"])
|
result_msg["tool_calls"] = list(parsed_msg["tool_calls"])
|
||||||
|
|
||||||
# Prepend reasoning_content as <think>...</think> so the model
|
# Pass reasoning content as a dedicated field so the chat template
|
||||||
# sees its own chain-of-thought in multi-turn conversations.
|
# can render it natively (Qwen3: message.reasoning_content branch).
|
||||||
reasoning = message.get("reasoning_content") # type: ignore[arg-type]
|
# Accept both "reasoning" (new vllm) and "reasoning_content" (ours).
|
||||||
|
reasoning = (message.get("reasoning") # type: ignore[arg-type]
|
||||||
|
or message.get("reasoning_content")) # type: ignore[arg-type]
|
||||||
if reasoning and isinstance(reasoning, str):
|
if reasoning and isinstance(reasoning, str):
|
||||||
existing = result_msg.get("content") or ""
|
result_msg["reasoning_content"] = reasoning
|
||||||
result_msg["content"] = (
|
|
||||||
f"<think>{reasoning}</think>\n\n{existing}"
|
|
||||||
if existing else f"<think>{reasoning}</think>"
|
|
||||||
)
|
|
||||||
|
|
||||||
elif role == "tool":
|
elif role == "tool":
|
||||||
parsed_msg = _ToolParser(message)
|
parsed_msg = _ToolParser(message)
|
||||||
|
|||||||
@@ -373,6 +373,31 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@model_validator(mode="before")
|
||||||
|
@classmethod
|
||||||
|
def normalize_messages(cls, data):
|
||||||
|
"""Normalize incoming messages before pydantic union validation.
|
||||||
|
|
||||||
|
Real-world clients (e.g. from other providers) send assistant tool_call
|
||||||
|
messages with content=null, which fails the strict Union type check.
|
||||||
|
Replace null content with "" so validation passes.
|
||||||
|
reasoning_content is intentionally kept — chat_utils.py wraps it as
|
||||||
|
<think>...</think> for multi-turn reasoning history.
|
||||||
|
"""
|
||||||
|
messages = data.get("messages")
|
||||||
|
if not isinstance(messages, list):
|
||||||
|
return data
|
||||||
|
normalized = []
|
||||||
|
for msg in messages:
|
||||||
|
if not isinstance(msg, dict):
|
||||||
|
normalized.append(msg)
|
||||||
|
continue
|
||||||
|
if msg.get("content") is None:
|
||||||
|
msg = {**msg, "content": ""}
|
||||||
|
normalized.append(msg)
|
||||||
|
data = {**data, "messages": normalized}
|
||||||
|
return data
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
def validate_stream_options(cls, data):
|
def validate_stream_options(cls, data):
|
||||||
|
|||||||
Reference in New Issue
Block a user