[metrics] Add in queue metrics (#4444)

This commit is contained in:
Stefan He
2025-04-09 17:19:27 -07:00
committed by GitHub
parent 4cb53ecd0c
commit 5db37c8626
3 changed files with 30 additions and 1 deletions

View File

@@ -27,6 +27,7 @@ class SchedulerStats:
num_queue_reqs: int = 0
cache_hit_rate: float = 0.0
spec_accept_length: float = 0.0
avg_request_queue_latency: float = 0.0
class SchedulerMetricsCollector:
@@ -87,6 +88,13 @@ class SchedulerMetricsCollector:
multiprocess_mode="mostrecent",
)
self.avg_request_queue_latency = Gauge(
name="sglang:avg_request_queue_latency",
documentation="The average request queue latency for the last batch of requests in seconds.",
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
def _log_gauge(self, gauge, data: Union[int, float]) -> None:
# Convenience function for logging to gauge.
gauge.labels(**self.labels).set(data)
@@ -99,6 +107,7 @@ class SchedulerMetricsCollector:
self._log_gauge(self.num_queue_reqs, stats.num_queue_reqs)
self._log_gauge(self.cache_hit_rate, stats.cache_hit_rate)
self._log_gauge(self.spec_accept_length, stats.spec_accept_length)
self._log_gauge(self.avg_request_queue_latency, stats.avg_request_queue_latency)
self.last_log_time = time.time()