diff --git a/qwen3_6_scripts/chat_utils.py b/qwen3_6_scripts/chat_utils.py
index 76d92fb..37d9962 100644
--- a/qwen3_6_scripts/chat_utils.py
+++ b/qwen3_6_scripts/chat_utils.py
@@ -113,6 +113,11 @@ class ConversationMessage(TypedDict, total=False):
tool_calls: Optional[Iterable[ChatCompletionMessageToolCallParam]]
"""The tool calls generated by the model, such as function calls."""
+ reasoning_content: Optional[str]
+ """Reasoning / thinking content for assistant messages.
+ Passed directly to the chat template (Qwen3 reads message.reasoning_content
+ natively) instead of being manually wrapped in ...."""
+
ModalityStr = Literal["image", "audio", "video"]
_T = TypeVar("_T")
@@ -480,15 +485,13 @@ def _parse_chat_message_content(
if "tool_calls" in parsed_msg:
result_msg["tool_calls"] = list(parsed_msg["tool_calls"])
- # Prepend reasoning_content as ... so the model
- # sees its own chain-of-thought in multi-turn conversations.
- reasoning = message.get("reasoning_content") # type: ignore[arg-type]
+ # Pass reasoning content as a dedicated field so the chat template
+ # can render it natively (Qwen3: message.reasoning_content branch).
+ # Accept both "reasoning" (new vllm) and "reasoning_content" (ours).
+ reasoning = (message.get("reasoning") # type: ignore[arg-type]
+ or message.get("reasoning_content")) # type: ignore[arg-type]
if reasoning and isinstance(reasoning, str):
- existing = result_msg.get("content") or ""
- result_msg["content"] = (
- f"{reasoning}\n\n{existing}"
- if existing else f"{reasoning}"
- )
+ result_msg["reasoning_content"] = reasoning
elif role == "tool":
parsed_msg = _ToolParser(message)
diff --git a/qwen3_6_scripts/protocol.py b/qwen3_6_scripts/protocol.py
index 17e495b..3a6a557 100644
--- a/qwen3_6_scripts/protocol.py
+++ b/qwen3_6_scripts/protocol.py
@@ -373,6 +373,31 @@ class ChatCompletionRequest(OpenAIBaseModel):
return None
+ @model_validator(mode="before")
+ @classmethod
+ def normalize_messages(cls, data):
+ """Normalize incoming messages before pydantic union validation.
+
+ Real-world clients (e.g. from other providers) send assistant tool_call
+ messages with content=null, which fails the strict Union type check.
+ Replace null content with "" so validation passes.
+ reasoning_content is intentionally kept — chat_utils.py wraps it as
+ ... for multi-turn reasoning history.
+ """
+ messages = data.get("messages")
+ if not isinstance(messages, list):
+ return data
+ normalized = []
+ for msg in messages:
+ if not isinstance(msg, dict):
+ normalized.append(msg)
+ continue
+ if msg.get("content") is None:
+ msg = {**msg, "content": ""}
+ normalized.append(msg)
+ data = {**data, "messages": normalized}
+ return data
+
@model_validator(mode="before")
@classmethod
def validate_stream_options(cls, data):