diff --git a/benchmark/latency_throughput/bench_one.py b/benchmark/latency_throughput/bench_one.py index 36ae8a436..cfd96b54c 100644 --- a/benchmark/latency_throughput/bench_one.py +++ b/benchmark/latency_throughput/bench_one.py @@ -97,7 +97,10 @@ def run_one_batch_size(bs): print(ret) output_throughput = bs * max_new_tokens / latency - print(f"latency: {latency:.2f} s, speed: {output_throughput:.2f} token/s") + overall_throughput = bs * (args.input_len + max_new_tokens) / latency + print(f"latency: {latency:.2f} s") + print(f"decode throughput: {output_throughput:.2f} token/s") + print(f"overall throughput: {overall_throughput:.2f} token/s") with open("results.jsonl", "a") as fout: res = { @@ -107,6 +110,7 @@ def run_one_batch_size(bs): "batch_size": bs, "latency": latency, "output_throughput": output_throughput, + "overall_throughput": overall_throughput, } fout.write(json.dumps(res) + "\n")