Collect more metrics: num_requests_total (#2859)

This commit is contained in:
Lianmin Zheng
2025-01-13 03:57:39 -08:00
committed by GitHub
parent 67008f4b32
commit 51ab3ccf47
3 changed files with 49 additions and 45 deletions

View File

@@ -109,6 +109,12 @@ class TokenizerMetricsCollector:
labelnames=labels.keys(),
)
self.num_requests_total = Counter(
name="sglang:num_requests_total",
documentation="Number of requests processed.",
labelnames=labels.keys(),
)
self.histogram_time_to_first_token = Histogram(
name="sglang:time_to_first_token_seconds",
documentation="Histogram of time to first token in seconds.",
@@ -185,11 +191,10 @@ class TokenizerMetricsCollector:
# Convenience function for logging to counter.
counter.labels(**self.labels).inc(data)
def inc_prompt_tokens(self, value: int):
self._log_counter(self.prompt_tokens_total, value)
def inc_generation_tokens(self, value: int):
self._log_counter(self.generation_tokens_total, value)
def observe_one_finished_request(self, prompt_tokens: int, generation_tokens: int):
self.prompt_tokens_total.labels(**self.labels).inc(prompt_tokens)
self.generation_tokens_total.labels(**self.labels).inc(generation_tokens)
self.num_requests_total.labels(**self.labels).inc(1)
def observe_time_to_first_token(self, value: Union[float, int]):
self._log_histogram(self.histogram_time_to_first_token, value)