From 47a4d9e72a1ea6e01edc0d49f86461ff2a8717db Mon Sep 17 00:00:00 2001
From: Lu Xinlong <luxinlong02@4paradigm.com>
Date: Thu, 18 Jun 2026 12:21:05 +0800
Subject: [PATCH] fix no reasoning token issue

---
 qwen3_6_scripts/serving_chat.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/qwen3_6_scripts/serving_chat.py b/qwen3_6_scripts/serving_chat.py
index 2c91959..6404ad4 100644
--- a/qwen3_6_scripts/serving_chat.py
+++ b/qwen3_6_scripts/serving_chat.py
@@ -560,9 +560,14 @@ class OpenAIServingChat(OpenAIServing):
                     # if the message delta is None (e.g. because it was a
                     # "control token" for tool calls or the parser otherwise
                     # wasn't ready to send a token, then
-                    #   get the next token without streaming a chunk
+                    #   get the next token without streaming a chunk.
+                    # However, if this is the finish token we must NOT skip —
+                    # the finish block updates reasoning_token_counts, sets
+                    # finish_reason_sent, and flushes the final usage chunk.
                     if delta_message is None:
-                        continue
+                        if output.finish_reason is None:
+                            continue
+                        delta_message = DeltaMessage()
 
                     if output.finish_reason is None:
                         # Send token-by-token response for each request.n