From 47a4d9e72a1ea6e01edc0d49f86461ff2a8717db Mon Sep 17 00:00:00 2001 From: Lu Xinlong Date: Thu, 18 Jun 2026 12:21:05 +0800 Subject: [PATCH] fix no reasoning token issue --- qwen3_6_scripts/serving_chat.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/qwen3_6_scripts/serving_chat.py b/qwen3_6_scripts/serving_chat.py index 2c91959..6404ad4 100644 --- a/qwen3_6_scripts/serving_chat.py +++ b/qwen3_6_scripts/serving_chat.py @@ -560,9 +560,14 @@ class OpenAIServingChat(OpenAIServing): # if the message delta is None (e.g. because it was a # "control token" for tool calls or the parser otherwise # wasn't ready to send a token, then - # get the next token without streaming a chunk + # get the next token without streaming a chunk. + # However, if this is the finish token we must NOT skip — + # the finish block updates reasoning_token_counts, sets + # finish_reason_sent, and flushes the final usage chunk. if delta_message is None: - continue + if output.finish_reason is None: + continue + delta_message = DeltaMessage() if output.finish_reason is None: # Send token-by-token response for each request.n