Update benchmark script (#621)
This commit is contained in:
@@ -97,7 +97,10 @@ def run_one_batch_size(bs):
|
|||||||
print(ret)
|
print(ret)
|
||||||
|
|
||||||
output_throughput = bs * max_new_tokens / latency
|
output_throughput = bs * max_new_tokens / latency
|
||||||
print(f"latency: {latency:.2f} s, speed: {output_throughput:.2f} token/s")
|
overall_throughput = bs * (args.input_len + max_new_tokens) / latency
|
||||||
|
print(f"latency: {latency:.2f} s")
|
||||||
|
print(f"decode throughput: {output_throughput:.2f} token/s")
|
||||||
|
print(f"overall throughput: {overall_throughput:.2f} token/s")
|
||||||
|
|
||||||
with open("results.jsonl", "a") as fout:
|
with open("results.jsonl", "a") as fout:
|
||||||
res = {
|
res = {
|
||||||
@@ -107,6 +110,7 @@ def run_one_batch_size(bs):
|
|||||||
"batch_size": bs,
|
"batch_size": bs,
|
||||||
"latency": latency,
|
"latency": latency,
|
||||||
"output_throughput": output_throughput,
|
"output_throughput": output_throughput,
|
||||||
|
"overall_throughput": overall_throughput,
|
||||||
}
|
}
|
||||||
fout.write(json.dumps(res) + "\n")
|
fout.write(json.dumps(res) + "\n")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user