Improve send_sone script (#11817)
This commit is contained in:
@@ -12,6 +12,7 @@ import dataclasses
|
||||
import json
|
||||
|
||||
import requests
|
||||
import tabulate
|
||||
|
||||
from sglang.profiler import run_profile
|
||||
|
||||
@@ -141,12 +142,16 @@ def send_one_prompt(args):
|
||||
)
|
||||
|
||||
if args.stream:
|
||||
last_len = 0
|
||||
for chunk in response.iter_lines(decode_unicode=False):
|
||||
chunk = chunk.decode("utf-8")
|
||||
if chunk and chunk.startswith("data:"):
|
||||
if chunk == "data: [DONE]":
|
||||
break
|
||||
ret = json.loads(chunk[5:].strip("\n"))
|
||||
chunk_str = ret["text"][last_len:]
|
||||
last_len = len(ret["text"])
|
||||
print(chunk_str, end="", flush=True)
|
||||
else:
|
||||
ret = response.json()
|
||||
|
||||
@@ -157,21 +162,25 @@ def send_one_prompt(args):
|
||||
print(ret)
|
||||
return 0, 0
|
||||
|
||||
latency = ret["meta_info"]["e2e_latency"]
|
||||
|
||||
if "spec_verify_ct" in ret["meta_info"]:
|
||||
if "spec_verify_ct" in ret["meta_info"] and ret["meta_info"]["spec_verify_ct"] > 0:
|
||||
acc_length = (
|
||||
ret["meta_info"]["completion_tokens"] / ret["meta_info"]["spec_verify_ct"]
|
||||
)
|
||||
else:
|
||||
acc_length = 1.0
|
||||
|
||||
latency = ret["meta_info"]["e2e_latency"]
|
||||
speed = ret["meta_info"]["completion_tokens"] / latency
|
||||
tokens = ret["meta_info"]["completion_tokens"]
|
||||
|
||||
if not args.stream:
|
||||
print(ret["text"])
|
||||
|
||||
print(ret["text"])
|
||||
print()
|
||||
print(f"{acc_length=:.2f}")
|
||||
print(f"{speed=:.2f} token/s")
|
||||
headers = ["Latency (s)", "Tokens", "Acc Length", "Speed (token/s)"]
|
||||
rows = [[f"{latency:.3f}", f"{tokens}", f"{acc_length:.3f}", f"{speed:.2f}"]]
|
||||
msg = tabulate.tabulate(rows, headers=headers, tablefmt="pretty")
|
||||
print(msg)
|
||||
|
||||
return acc_length, speed
|
||||
|
||||
|
||||
Reference in New Issue
Block a user