From 4014804157157b72f53d83a3add4a5e9d984652d Mon Sep 17 00:00:00 2001 From: Wen Sun <35923278+HermitSun@users.noreply.github.com> Date: Thu, 13 Mar 2025 13:12:55 +0800 Subject: [PATCH] =?UTF-8?q?Ensure=20Usage=20Data=20in=20Streaming=20Respon?= =?UTF-8?q?ses=20Aligns=20with=20vLLM=E2=80=99s=20Implementation=20(#3814)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/sglang/srt/openai_api/adapter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index 7d3076a07..e6a7d9984 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -820,13 +820,13 @@ async def v1_completions(tokenizer_manager, raw_request: Request): ) final_usage_chunk = CompletionStreamResponse( - id=str(uuid.uuid4().hex), + id=content["meta_info"]["id"], choices=[], model=request.model, usage=usage, ) final_usage_data = final_usage_chunk.model_dump_json( - exclude_unset=True, exclude_none=True + exclude_none=True ) yield f"data: {final_usage_data}\n\n" except ValueError as e: @@ -1495,13 +1495,13 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request): ) final_usage_chunk = ChatCompletionStreamResponse( - id=str(uuid.uuid4().hex), + id=content["meta_info"]["id"], choices=[], model=request.model, usage=usage, ) final_usage_data = final_usage_chunk.model_dump_json( - exclude_unset=True, exclude_none=True + exclude_none=True ) yield f"data: {final_usage_data}\n\n" except ValueError as e: