Fix metrics (#1963)
This commit is contained in:
@@ -213,19 +213,67 @@ class Metrics:
|
||||
name="sglang:e2e_request_latency_seconds",
|
||||
documentation="Histogram of End-to-end request latency in seconds",
|
||||
labelnames=labelnames,
|
||||
buckets=build_1_2_5_buckets(max_model_len),
|
||||
buckets=[
|
||||
0.3,
|
||||
0.5,
|
||||
0.8,
|
||||
1.0,
|
||||
1.5,
|
||||
2.0,
|
||||
2.5,
|
||||
5.0,
|
||||
10.0,
|
||||
15.0,
|
||||
20.0,
|
||||
30.0,
|
||||
40.0,
|
||||
50.0,
|
||||
60.0,
|
||||
],
|
||||
)
|
||||
self.histogram_time_waiting_requests = Histogram(
|
||||
name="sglang:waiting_request_latency_seconds",
|
||||
documentation="Histogram of request waiting time in seconds",
|
||||
labelnames=labelnames,
|
||||
buckets=build_1_2_5_buckets(max_model_len),
|
||||
buckets=[
|
||||
0.3,
|
||||
0.5,
|
||||
0.8,
|
||||
1.0,
|
||||
1.5,
|
||||
2.0,
|
||||
2.5,
|
||||
5.0,
|
||||
10.0,
|
||||
15.0,
|
||||
20.0,
|
||||
30.0,
|
||||
40.0,
|
||||
50.0,
|
||||
60.0,
|
||||
],
|
||||
)
|
||||
self.histogram_time_decode_requests = Histogram(
|
||||
name="sglang:decode_request_latency_seconds",
|
||||
documentation="Histogram of request decoding time in seconds",
|
||||
labelnames=labelnames,
|
||||
buckets=build_1_2_5_buckets(max_model_len),
|
||||
buckets=[
|
||||
0.3,
|
||||
0.5,
|
||||
0.8,
|
||||
1.0,
|
||||
1.5,
|
||||
2.0,
|
||||
2.5,
|
||||
5.0,
|
||||
10.0,
|
||||
15.0,
|
||||
20.0,
|
||||
30.0,
|
||||
40.0,
|
||||
50.0,
|
||||
60.0,
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -34,15 +34,12 @@ class Stats:
|
||||
num_running_req: int = 0
|
||||
num_waiting_req: int = 0
|
||||
gen_throughput: float = 0.0
|
||||
num_token: int = 0
|
||||
token_usage: float = 0.0
|
||||
waiting_queue: int = 0
|
||||
time_e2e_requests: List[float] = field(default_factory=list)
|
||||
time_waiting_requests: List[float] = field(default_factory=list)
|
||||
time_decode_requests: List[float] = field(default_factory=list)
|
||||
# system stats
|
||||
token_usage: float = 0.0
|
||||
is_mixed_chunk: bool = False
|
||||
new_seq: int = 0
|
||||
new_token: int = 0
|
||||
cached_token: int = 0
|
||||
|
||||
Reference in New Issue
Block a user