From b98cf398666614133050feb57a735c8746c65d03 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 14 Oct 2025 13:34:33 -0700 Subject: [PATCH] [Auto Sync] Update collector.py (20251014) (#11625) Co-authored-by: github-actions[bot] Co-authored-by: Byron Hsu --- python/sglang/srt/metrics/collector.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/python/sglang/srt/metrics/collector.py b/python/sglang/srt/metrics/collector.py index 60e0758ea..bd3204079 100644 --- a/python/sglang/srt/metrics/collector.py +++ b/python/sglang/srt/metrics/collector.py @@ -118,6 +118,7 @@ class SchedulerStats: num_running_reqs: int = 0 num_used_tokens: int = 0 token_usage: float = 0.0 + pending_prealloc_token_usage: float = 0.0 swa_token_usage: float = 0.0 gen_throughput: float = 0.0 num_queue_reqs: int = 0 @@ -177,6 +178,12 @@ class SchedulerMetricsCollector: labelnames=labels.keys(), multiprocess_mode="mostrecent", ) + self.pending_prealloc_token_usage = Gauge( + name="sglang:pending_prealloc_token_usage", + documentation="The token usage for pending preallocated tokens (not preallocated yet).", + labelnames=labels.keys(), + multiprocess_mode="mostrecent", + ) self.swa_token_usage = Gauge( name="sglang:swa_token_usage", documentation="The token usage for SWA layers.", @@ -516,6 +523,9 @@ class SchedulerMetricsCollector: self._log_gauge(self.num_running_reqs, stats.num_running_reqs) self._log_gauge(self.num_used_tokens, stats.num_used_tokens) self._log_gauge(self.token_usage, stats.token_usage) + self._log_gauge( + self.pending_prealloc_token_usage, stats.pending_prealloc_token_usage + ) self._log_gauge(self.swa_token_usage, stats.swa_token_usage) self._log_gauge(self.gen_throughput, stats.gen_throughput) self._log_gauge(self.num_queue_reqs, stats.num_queue_reqs)