From c333f12547184778e60818816d3288b0f67c21ce Mon Sep 17 00:00:00 2001 From: guobj Date: Tue, 28 Oct 2025 02:11:36 +0000 Subject: [PATCH] =?UTF-8?q?=E8=A1=A5=E5=85=85=20bench=5Fserving.py?= =?UTF-8?q?=E9=87=8Ctpot=E7=AD=89=E6=8C=87=E6=A0=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/sglang/bench_serving.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/python/sglang/bench_serving.py b/python/sglang/bench_serving.py index 5e4e2c8ef..32a6d2692 100644 --- a/python/sglang/bench_serving.py +++ b/python/sglang/bench_serving.py @@ -839,10 +839,12 @@ class BenchmarkMetrics: mean_ttft_ms: float median_ttft_ms: float std_ttft_ms: float + p95_ttft_ms: float p99_ttft_ms: float mean_tpot_ms: float median_tpot_ms: float std_tpot_ms: float + p95_tpot_ms: float p99_tpot_ms: float mean_itl_ms: float median_itl_ms: float @@ -1665,10 +1667,12 @@ def calculate_metrics( * 1000, # ttfts is empty if streaming is not supported by backend median_ttft_ms=np.median(ttfts or 0) * 1000, std_ttft_ms=np.std(ttfts or 0) * 1000, + p95_ttft_ms=np.percentile(ttfts or 0, 95) * 1000, p99_ttft_ms=np.percentile(ttfts or 0, 99) * 1000, mean_tpot_ms=np.mean(tpots or 0) * 1000, median_tpot_ms=np.median(tpots or 0) * 1000, std_tpot_ms=np.std(tpots or 0) * 1000, + p95_tpot_ms=np.percentile(tpots or 0, 95) * 1000, p99_tpot_ms=np.percentile(tpots or 0, 99) * 1000, mean_itl_ms=np.mean(itls or 0) * 1000, median_itl_ms=np.median(itls or 0) * 1000, @@ -1974,6 +1978,12 @@ async def benchmark( print("{:<40} {:<10.2f}".format("Mean TTFT (ms):", metrics.mean_ttft_ms)) print("{:<40} {:<10.2f}".format("Median TTFT (ms):", metrics.median_ttft_ms)) print("{:<40} {:<10.2f}".format("P99 TTFT (ms):", metrics.p99_ttft_ms)) + print("{:<40} {:<10.2f}".format("P95 TTFT (ms):", metrics.p95_ttft_ms)) + print("{s:{c}^{n}}".format(s="Time per Output Token (excl. 1st token)", n=50, c="-")) + print("{:<40} {:<10.2f}".format("Mean TPOT (ms):", metrics.mean_tpot_ms)) + print("{:<40} {:<10.2f}".format("Median TPOT (ms):", metrics.median_tpot_ms)) + print("{:<40} {:<10.2f}".format("P99 TPOT (ms):", metrics.p99_tpot_ms)) + print("{:<40} {:<10.2f}".format("P95 TPOT (ms):", metrics.p95_tpot_ms)) print("{s:{c}^{n}}".format(s="Inter-Token Latency", n=50, c="-")) print("{:<40} {:<10.2f}".format("Mean ITL (ms):", metrics.mean_itl_ms)) print("{:<40} {:<10.2f}".format("Median ITL (ms):", metrics.median_itl_ms))