diff --git a/benchmark/hicache/bench_multiturn.py b/benchmark/hicache/bench_multiturn.py index 00b217217..fe154d6b6 100644 --- a/benchmark/hicache/bench_multiturn.py +++ b/benchmark/hicache/bench_multiturn.py @@ -479,6 +479,18 @@ class WorkloadGenerator: "summary": { "total_requests": len(self.performance_metrics["ttft"]), "request_rate": self.request_rate, + "average_prompt_len": ( + sum(self.performance_metrics["prompt_len"]) + / len(self.performance_metrics["prompt_len"]) + if self.performance_metrics["prompt_len"] + else 0.0 + ), + "average_output_len": ( + sum(self.performance_metrics["generated_len"]) + / len(self.performance_metrics["generated_len"]) + if self.performance_metrics["generated_len"] + else 0.0 + ), "average_ttft": sum(self.performance_metrics["ttft"]) / len(self.performance_metrics["ttft"]), "p90_ttft": sorted(self.performance_metrics["ttft"])[ @@ -534,6 +546,12 @@ class WorkloadGenerator: print( f" Total requests: {performance_data['summary']['total_requests']} at {performance_data['summary']['request_rate']} requests per second" ) + print( + f" Average Prompt Length: {performance_data['summary']['average_prompt_len']:.2f} tokens" + ) + print( + f" Average Output Length: {performance_data['summary']['average_output_len']:.2f} tokens" + ) print(f" Average TTFT: {performance_data['summary']['average_ttft']:.2f}") print(f" P90 TTFT: {performance_data['summary']['p90_ttft']:.2f}") print(f" Median TTFT: {performance_data['summary']['median_ttft']:.2f}")