This commit is contained in:
Ying Sheng
2024-07-05 10:06:17 -07:00
committed by GitHub
parent 5a57b8addd
commit dc1b8bcfaa
21 changed files with 487 additions and 354 deletions

View File

@@ -92,4 +92,4 @@ if __name__ == "__main__":
print(ret)
speed = args.batch_size * max_new_tokens / latency
print(f"latency: {latency:.2f} s, speed: {speed:.2f} token/s")
print(f"latency: {latency:.2f} s, speed: {speed:.2f} token/s")

View File

@@ -307,8 +307,9 @@ def main(args: argparse.Namespace):
avg_per_output_token_latency = np.mean(
[latency / output_len for _, output_len, latency in REQUEST_LATENCY]
)
decoding_throughput = np.sum([
output_len for _, output_len, _ in REQUEST_LATENCY]) / benchmark_time
decoding_throughput = (
np.sum([output_len for _, output_len, _ in REQUEST_LATENCY]) / benchmark_time
)
print(f"Total time: {benchmark_time:.2f} s")
print(f"Request throughput: {args.num_prompts / benchmark_time:.2f} requests/s")