Split the __init__ of scheduler as smaller functions. Improve the eagle tests (#4128)

This commit is contained in:
Lianmin Zheng
2025-03-06 00:13:20 -08:00
committed by GitHub
parent 0804dd11a0
commit fcc2e37f69
7 changed files with 279 additions and 341 deletions

View File

@@ -121,6 +121,12 @@ class TokenizerMetricsCollector:
labelnames=labels.keys(),
)
self.cached_tokens_total = Counter(
name="sglang:cached_tokens_total",
documentation="Number of cached prompt tokens.",
labelnames=labels.keys(),
)
self.num_requests_total = Counter(
name="sglang:num_requests_total",
documentation="Number of requests processed.",
@@ -245,10 +251,12 @@ class TokenizerMetricsCollector:
self,
prompt_tokens: int,
generation_tokens: int,
cached_tokens: int,
e2e_latency: float,
):
self.prompt_tokens_total.labels(**self.labels).inc(prompt_tokens)
self.generation_tokens_total.labels(**self.labels).inc(generation_tokens)
self.cached_tokens_total.labels(**self.labels).inc(cached_tokens)
self.num_requests_total.labels(**self.labels).inc(1)
self._log_histogram(self.histogram_e2e_request_latency, e2e_latency)
if generation_tokens >= 1: