[Auto Sync] Update collector.py (20251014) (#11625)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Byron Hsu <byronhsu1230@gmail.com>
This commit is contained in:
@@ -118,6 +118,7 @@ class SchedulerStats:
|
|||||||
num_running_reqs: int = 0
|
num_running_reqs: int = 0
|
||||||
num_used_tokens: int = 0
|
num_used_tokens: int = 0
|
||||||
token_usage: float = 0.0
|
token_usage: float = 0.0
|
||||||
|
pending_prealloc_token_usage: float = 0.0
|
||||||
swa_token_usage: float = 0.0
|
swa_token_usage: float = 0.0
|
||||||
gen_throughput: float = 0.0
|
gen_throughput: float = 0.0
|
||||||
num_queue_reqs: int = 0
|
num_queue_reqs: int = 0
|
||||||
@@ -177,6 +178,12 @@ class SchedulerMetricsCollector:
|
|||||||
labelnames=labels.keys(),
|
labelnames=labels.keys(),
|
||||||
multiprocess_mode="mostrecent",
|
multiprocess_mode="mostrecent",
|
||||||
)
|
)
|
||||||
|
self.pending_prealloc_token_usage = Gauge(
|
||||||
|
name="sglang:pending_prealloc_token_usage",
|
||||||
|
documentation="The token usage for pending preallocated tokens (not preallocated yet).",
|
||||||
|
labelnames=labels.keys(),
|
||||||
|
multiprocess_mode="mostrecent",
|
||||||
|
)
|
||||||
self.swa_token_usage = Gauge(
|
self.swa_token_usage = Gauge(
|
||||||
name="sglang:swa_token_usage",
|
name="sglang:swa_token_usage",
|
||||||
documentation="The token usage for SWA layers.",
|
documentation="The token usage for SWA layers.",
|
||||||
@@ -516,6 +523,9 @@ class SchedulerMetricsCollector:
|
|||||||
self._log_gauge(self.num_running_reqs, stats.num_running_reqs)
|
self._log_gauge(self.num_running_reqs, stats.num_running_reqs)
|
||||||
self._log_gauge(self.num_used_tokens, stats.num_used_tokens)
|
self._log_gauge(self.num_used_tokens, stats.num_used_tokens)
|
||||||
self._log_gauge(self.token_usage, stats.token_usage)
|
self._log_gauge(self.token_usage, stats.token_usage)
|
||||||
|
self._log_gauge(
|
||||||
|
self.pending_prealloc_token_usage, stats.pending_prealloc_token_usage
|
||||||
|
)
|
||||||
self._log_gauge(self.swa_token_usage, stats.swa_token_usage)
|
self._log_gauge(self.swa_token_usage, stats.swa_token_usage)
|
||||||
self._log_gauge(self.gen_throughput, stats.gen_throughput)
|
self._log_gauge(self.gen_throughput, stats.gen_throughput)
|
||||||
self._log_gauge(self.num_queue_reqs, stats.num_queue_reqs)
|
self._log_gauge(self.num_queue_reqs, stats.num_queue_reqs)
|
||||||
|
|||||||
Reference in New Issue
Block a user