fix: update truss bench_serving (#5683)

This commit is contained in:
Yineng Zhang
2025-04-23 13:28:35 -07:00
committed by GitHub
parent 7c99103f4c
commit b1f6d89b5f

View File

@@ -295,7 +295,7 @@ async def async_request_truss(
# NOTE: Some completion API might have a last
# usage summary response without a token so we
# want to check a token was generated
if data["choices"][0]["delta"]["content"]:
if data["choices"][0]["text"]:
timestamp = time.perf_counter()
# First token
if ttft == 0.0:
@@ -307,7 +307,7 @@ async def async_request_truss(
output.itl.append(timestamp - most_recent_timestamp)
most_recent_timestamp = timestamp
generated_text += data["choices"][0]["delta"]["content"]
generated_text += data["choices"][0]["text"]
output.generated_text = generated_text
output.success = True