补充 bench_serving.py里tpot等指标
This commit is contained in:
@@ -839,10 +839,12 @@ class BenchmarkMetrics:
|
|||||||
mean_ttft_ms: float
|
mean_ttft_ms: float
|
||||||
median_ttft_ms: float
|
median_ttft_ms: float
|
||||||
std_ttft_ms: float
|
std_ttft_ms: float
|
||||||
|
p95_ttft_ms: float
|
||||||
p99_ttft_ms: float
|
p99_ttft_ms: float
|
||||||
mean_tpot_ms: float
|
mean_tpot_ms: float
|
||||||
median_tpot_ms: float
|
median_tpot_ms: float
|
||||||
std_tpot_ms: float
|
std_tpot_ms: float
|
||||||
|
p95_tpot_ms: float
|
||||||
p99_tpot_ms: float
|
p99_tpot_ms: float
|
||||||
mean_itl_ms: float
|
mean_itl_ms: float
|
||||||
median_itl_ms: float
|
median_itl_ms: float
|
||||||
@@ -1665,10 +1667,12 @@ def calculate_metrics(
|
|||||||
* 1000, # ttfts is empty if streaming is not supported by backend
|
* 1000, # ttfts is empty if streaming is not supported by backend
|
||||||
median_ttft_ms=np.median(ttfts or 0) * 1000,
|
median_ttft_ms=np.median(ttfts or 0) * 1000,
|
||||||
std_ttft_ms=np.std(ttfts or 0) * 1000,
|
std_ttft_ms=np.std(ttfts or 0) * 1000,
|
||||||
|
p95_ttft_ms=np.percentile(ttfts or 0, 95) * 1000,
|
||||||
p99_ttft_ms=np.percentile(ttfts or 0, 99) * 1000,
|
p99_ttft_ms=np.percentile(ttfts or 0, 99) * 1000,
|
||||||
mean_tpot_ms=np.mean(tpots or 0) * 1000,
|
mean_tpot_ms=np.mean(tpots or 0) * 1000,
|
||||||
median_tpot_ms=np.median(tpots or 0) * 1000,
|
median_tpot_ms=np.median(tpots or 0) * 1000,
|
||||||
std_tpot_ms=np.std(tpots or 0) * 1000,
|
std_tpot_ms=np.std(tpots or 0) * 1000,
|
||||||
|
p95_tpot_ms=np.percentile(tpots or 0, 95) * 1000,
|
||||||
p99_tpot_ms=np.percentile(tpots or 0, 99) * 1000,
|
p99_tpot_ms=np.percentile(tpots or 0, 99) * 1000,
|
||||||
mean_itl_ms=np.mean(itls or 0) * 1000,
|
mean_itl_ms=np.mean(itls or 0) * 1000,
|
||||||
median_itl_ms=np.median(itls or 0) * 1000,
|
median_itl_ms=np.median(itls or 0) * 1000,
|
||||||
@@ -1974,6 +1978,12 @@ async def benchmark(
|
|||||||
print("{:<40} {:<10.2f}".format("Mean TTFT (ms):", metrics.mean_ttft_ms))
|
print("{:<40} {:<10.2f}".format("Mean TTFT (ms):", metrics.mean_ttft_ms))
|
||||||
print("{:<40} {:<10.2f}".format("Median TTFT (ms):", metrics.median_ttft_ms))
|
print("{:<40} {:<10.2f}".format("Median TTFT (ms):", metrics.median_ttft_ms))
|
||||||
print("{:<40} {:<10.2f}".format("P99 TTFT (ms):", metrics.p99_ttft_ms))
|
print("{:<40} {:<10.2f}".format("P99 TTFT (ms):", metrics.p99_ttft_ms))
|
||||||
|
print("{:<40} {:<10.2f}".format("P95 TTFT (ms):", metrics.p95_ttft_ms))
|
||||||
|
print("{s:{c}^{n}}".format(s="Time per Output Token (excl. 1st token)", n=50, c="-"))
|
||||||
|
print("{:<40} {:<10.2f}".format("Mean TPOT (ms):", metrics.mean_tpot_ms))
|
||||||
|
print("{:<40} {:<10.2f}".format("Median TPOT (ms):", metrics.median_tpot_ms))
|
||||||
|
print("{:<40} {:<10.2f}".format("P99 TPOT (ms):", metrics.p99_tpot_ms))
|
||||||
|
print("{:<40} {:<10.2f}".format("P95 TPOT (ms):", metrics.p95_tpot_ms))
|
||||||
print("{s:{c}^{n}}".format(s="Inter-Token Latency", n=50, c="-"))
|
print("{s:{c}^{n}}".format(s="Inter-Token Latency", n=50, c="-"))
|
||||||
print("{:<40} {:<10.2f}".format("Mean ITL (ms):", metrics.mean_itl_ms))
|
print("{:<40} {:<10.2f}".format("Mean ITL (ms):", metrics.mean_itl_ms))
|
||||||
print("{:<40} {:<10.2f}".format("Median ITL (ms):", metrics.median_itl_ms))
|
print("{:<40} {:<10.2f}".format("Median ITL (ms):", metrics.median_itl_ms))
|
||||||
|
|||||||
Reference in New Issue
Block a user