[Auto Sync] Update collector.py (20251014) (#11625)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Byron Hsu <byronhsu1230@gmail.com>
This commit is contained in:
Lianmin Zheng
2025-10-14 13:34:33 -07:00
committed by GitHub
parent 27d710457c
commit b98cf39866

View File

@@ -118,6 +118,7 @@ class SchedulerStats:
num_running_reqs: int = 0
num_used_tokens: int = 0
token_usage: float = 0.0
pending_prealloc_token_usage: float = 0.0
swa_token_usage: float = 0.0
gen_throughput: float = 0.0
num_queue_reqs: int = 0
@@ -177,6 +178,12 @@ class SchedulerMetricsCollector:
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
self.pending_prealloc_token_usage = Gauge(
name="sglang:pending_prealloc_token_usage",
documentation="The token usage for pending preallocated tokens (not preallocated yet).",
labelnames=labels.keys(),
multiprocess_mode="mostrecent",
)
self.swa_token_usage = Gauge(
name="sglang:swa_token_usage",
documentation="The token usage for SWA layers.",
@@ -516,6 +523,9 @@ class SchedulerMetricsCollector:
self._log_gauge(self.num_running_reqs, stats.num_running_reqs)
self._log_gauge(self.num_used_tokens, stats.num_used_tokens)
self._log_gauge(self.token_usage, stats.token_usage)
self._log_gauge(
self.pending_prealloc_token_usage, stats.pending_prealloc_token_usage
)
self._log_gauge(self.swa_token_usage, stats.swa_token_usage)
self._log_gauge(self.gen_throughput, stats.gen_throughput)
self._log_gauge(self.num_queue_reqs, stats.num_queue_reqs)