diff --git a/qwen3_6_scripts/serving_chat.py b/qwen3_6_scripts/serving_chat.py index 2c91959..6404ad4 100644 --- a/qwen3_6_scripts/serving_chat.py +++ b/qwen3_6_scripts/serving_chat.py @@ -560,9 +560,14 @@ class OpenAIServingChat(OpenAIServing): # if the message delta is None (e.g. because it was a # "control token" for tool calls or the parser otherwise # wasn't ready to send a token, then - # get the next token without streaming a chunk + # get the next token without streaming a chunk. + # However, if this is the finish token we must NOT skip — + # the finish block updates reasoning_token_counts, sets + # finish_reason_sent, and flushes the final usage chunk. if delta_message is None: - continue + if output.finish_reason is None: + continue + delta_message = DeltaMessage() if output.finish_reason is None: # Send token-by-token response for each request.n