From bae9541e4c727ac8d7e082a2b8fd4e028e58b606 Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Sun, 14 Jul 2024 14:38:13 -0700 Subject: [PATCH] Update benchmark script (#621) --- benchmark/latency_throughput/bench_one.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/benchmark/latency_throughput/bench_one.py b/benchmark/latency_throughput/bench_one.py index 36ae8a436..cfd96b54c 100644 --- a/benchmark/latency_throughput/bench_one.py +++ b/benchmark/latency_throughput/bench_one.py @@ -97,7 +97,10 @@ def run_one_batch_size(bs): print(ret) output_throughput = bs * max_new_tokens / latency - print(f"latency: {latency:.2f} s, speed: {output_throughput:.2f} token/s") + overall_throughput = bs * (args.input_len + max_new_tokens) / latency + print(f"latency: {latency:.2f} s") + print(f"decode throughput: {output_throughput:.2f} token/s") + print(f"overall throughput: {overall_throughput:.2f} token/s") with open("results.jsonl", "a") as fout: res = { @@ -107,6 +110,7 @@ def run_one_batch_size(bs): "batch_size": bs, "latency": latency, "output_throughput": output_throughput, + "overall_throughput": overall_throughput, } fout.write(json.dumps(res) + "\n")