fix no reasoning token issue
This commit is contained in:
@@ -560,9 +560,14 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
# if the message delta is None (e.g. because it was a
|
# if the message delta is None (e.g. because it was a
|
||||||
# "control token" for tool calls or the parser otherwise
|
# "control token" for tool calls or the parser otherwise
|
||||||
# wasn't ready to send a token, then
|
# wasn't ready to send a token, then
|
||||||
# get the next token without streaming a chunk
|
# get the next token without streaming a chunk.
|
||||||
|
# However, if this is the finish token we must NOT skip —
|
||||||
|
# the finish block updates reasoning_token_counts, sets
|
||||||
|
# finish_reason_sent, and flushes the final usage chunk.
|
||||||
if delta_message is None:
|
if delta_message is None:
|
||||||
continue
|
if output.finish_reason is None:
|
||||||
|
continue
|
||||||
|
delta_message = DeltaMessage()
|
||||||
|
|
||||||
if output.finish_reason is None:
|
if output.finish_reason is None:
|
||||||
# Send token-by-token response for each request.n
|
# Send token-by-token response for each request.n
|
||||||
|
|||||||
Reference in New Issue
Block a user