Improve send_sone script (#11817)

This commit is contained in:
Liangsheng Yin
2025-10-19 11:28:16 +08:00
committed by GitHub
parent 6d6ea5af0c
commit b288f4f440

View File

@@ -12,6 +12,7 @@ import dataclasses
import json
import requests
import tabulate
from sglang.profiler import run_profile
@@ -141,12 +142,16 @@ def send_one_prompt(args):
)
if args.stream:
last_len = 0
for chunk in response.iter_lines(decode_unicode=False):
chunk = chunk.decode("utf-8")
if chunk and chunk.startswith("data:"):
if chunk == "data: [DONE]":
break
ret = json.loads(chunk[5:].strip("\n"))
chunk_str = ret["text"][last_len:]
last_len = len(ret["text"])
print(chunk_str, end="", flush=True)
else:
ret = response.json()
@@ -157,21 +162,25 @@ def send_one_prompt(args):
print(ret)
return 0, 0
latency = ret["meta_info"]["e2e_latency"]
if "spec_verify_ct" in ret["meta_info"]:
if "spec_verify_ct" in ret["meta_info"] and ret["meta_info"]["spec_verify_ct"] > 0:
acc_length = (
ret["meta_info"]["completion_tokens"] / ret["meta_info"]["spec_verify_ct"]
)
else:
acc_length = 1.0
latency = ret["meta_info"]["e2e_latency"]
speed = ret["meta_info"]["completion_tokens"] / latency
tokens = ret["meta_info"]["completion_tokens"]
if not args.stream:
print(ret["text"])
print(ret["text"])
print()
print(f"{acc_length=:.2f}")
print(f"{speed=:.2f} token/s")
headers = ["Latency (s)", "Tokens", "Acc Length", "Speed (token/s)"]
rows = [[f"{latency:.3f}", f"{tokens}", f"{acc_length:.3f}", f"{speed:.2f}"]]
msg = tabulate.tabulate(rows, headers=headers, tablefmt="pretty")
print(msg)
return acc_length, speed