Fix metrics (#1963)

This commit is contained in:
Yudi Xue
2024-11-08 23:21:11 -08:00
committed by GitHub
parent d1150e9a00
commit 95a4ed129a
6 changed files with 142 additions and 8 deletions

View File

@@ -213,19 +213,67 @@ class Metrics:
name="sglang:e2e_request_latency_seconds",
documentation="Histogram of End-to-end request latency in seconds",
labelnames=labelnames,
buckets=build_1_2_5_buckets(max_model_len),
buckets=[
0.3,
0.5,
0.8,
1.0,
1.5,
2.0,
2.5,
5.0,
10.0,
15.0,
20.0,
30.0,
40.0,
50.0,
60.0,
],
)
self.histogram_time_waiting_requests = Histogram(
name="sglang:waiting_request_latency_seconds",
documentation="Histogram of request waiting time in seconds",
labelnames=labelnames,
buckets=build_1_2_5_buckets(max_model_len),
buckets=[
0.3,
0.5,
0.8,
1.0,
1.5,
2.0,
2.5,
5.0,
10.0,
15.0,
20.0,
30.0,
40.0,
50.0,
60.0,
],
)
self.histogram_time_decode_requests = Histogram(
name="sglang:decode_request_latency_seconds",
documentation="Histogram of request decoding time in seconds",
labelnames=labelnames,
buckets=build_1_2_5_buckets(max_model_len),
buckets=[
0.3,
0.5,
0.8,
1.0,
1.5,
2.0,
2.5,
5.0,
10.0,
15.0,
20.0,
30.0,
40.0,
50.0,
60.0,
],
)

View File

@@ -34,15 +34,12 @@ class Stats:
num_running_req: int = 0
num_waiting_req: int = 0
gen_throughput: float = 0.0
num_token: int = 0
token_usage: float = 0.0
waiting_queue: int = 0
time_e2e_requests: List[float] = field(default_factory=list)
time_waiting_requests: List[float] = field(default_factory=list)
time_decode_requests: List[float] = field(default_factory=list)
# system stats
token_usage: float = 0.0
is_mixed_chunk: bool = False
new_seq: int = 0
new_token: int = 0
cached_token: int = 0