From b5c6529e175a7a0887b3ae2e544c9191f43e8ba7 Mon Sep 17 00:00:00 2001 From: SCDESPERTATE <74419971+SCDESPERTATE@users.noreply.github.com> Date: Mon, 25 Aug 2025 14:16:43 +0800 Subject: [PATCH] [PD] Improve disaggregation metrics output: update the metrics to keep reflecting real stats (#7317) --- python/sglang/srt/disaggregation/decode.py | 4 ++++ python/sglang/srt/disaggregation/prefill.py | 4 ++++ .../srt/managers/scheduler_metrics_mixin.py | 15 +++++++++++++++ python/sglang/srt/metrics/collector.py | 10 +++++----- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/python/sglang/srt/disaggregation/decode.py b/python/sglang/srt/disaggregation/decode.py index 4c761c9a6..b9ce9bbff 100644 --- a/python/sglang/srt/disaggregation/decode.py +++ b/python/sglang/srt/disaggregation/decode.py @@ -334,6 +334,8 @@ class DecodePreallocQueue: error_message, status_code=HTTPStatus.INTERNAL_SERVER_ERROR, ) + if self.scheduler.enable_metrics: + self.scheduler.metrics_collector.increment_bootstrap_failed_reqs() else: raise ValueError(f"Unexpected poll case: {poll}") @@ -595,6 +597,8 @@ class DecodeTransferQueue: # unlock the kv cache or it will have memory leak self.tree_cache.cache_finished_req(decode_req.req) indices_to_remove.add(i) + if self.scheduler.enable_metrics: + self.scheduler.metrics_collector.increment_transfer_failed_reqs() continue elif poll == KVPoll.Success: diff --git a/python/sglang/srt/disaggregation/prefill.py b/python/sglang/srt/disaggregation/prefill.py index 5f5d0ebc6..063197618 100644 --- a/python/sglang/srt/disaggregation/prefill.py +++ b/python/sglang/srt/disaggregation/prefill.py @@ -238,6 +238,8 @@ class PrefillBootstrapQueue: self.scheduler.stream_output([req], req.return_logprob) indices_to_remove.add(i) failed_reqs.append(req) + if self.scheduler.enable_metrics: + self.scheduler.metrics_collector.increment_bootstrap_failed_reqs() continue # KV.WaitingForInput - init here @@ -522,6 +524,8 @@ class SchedulerDisaggregationPrefillMixin: req, error_message, status_code=HTTPStatus.INTERNAL_SERVER_ERROR ) done_reqs.append(req) + if self.enable_metrics: + self.metrics_collector.increment_transfer_failed_reqs() else: assert False, f"Unexpected polling state {poll=}" diff --git a/python/sglang/srt/managers/scheduler_metrics_mixin.py b/python/sglang/srt/managers/scheduler_metrics_mixin.py index a6497ffde..ccc61bd98 100644 --- a/python/sglang/srt/managers/scheduler_metrics_mixin.py +++ b/python/sglang/srt/managers/scheduler_metrics_mixin.py @@ -125,6 +125,14 @@ class SchedulerMetricsMixin: total_queue_latency += req.queue_time_end - req.queue_time_start self.stats.avg_request_queue_latency = total_queue_latency / num_new_seq + if self.disaggregation_mode == DisaggregationMode.PREFILL: + self.stats.num_prefill_prealloc_queue_reqs = len( + self.disagg_prefill_bootstrap_queue.queue + ) + self.stats.num_prefill_inflight_queue_reqs = len( + self.disagg_prefill_inflight_queue + ) + self.metrics_collector.log_stats(self.stats) self._emit_kv_metrics() self._publish_kv_events() @@ -202,6 +210,13 @@ class SchedulerMetricsMixin: self.stats.spec_accept_length = spec_accept_length self.stats.total_retracted_reqs = self.total_retracted_reqs self.metrics_collector.log_stats(self.stats) + if self.disaggregation_mode == DisaggregationMode.DECODE: + self.stats.num_decode_prealloc_queue_reqs = len( + self.disagg_decode_prealloc_queue.queue + ) + self.stats.num_decode_transfer_queue_reqs = len( + self.disagg_decode_transfer_queue.queue + ) self._emit_kv_metrics() self._publish_kv_events() diff --git a/python/sglang/srt/metrics/collector.py b/python/sglang/srt/metrics/collector.py index 4c32b8fc6..cfb90aa0a 100644 --- a/python/sglang/srt/metrics/collector.py +++ b/python/sglang/srt/metrics/collector.py @@ -142,7 +142,7 @@ class SchedulerStats: spec_accept_length: float = 0.0 avg_request_queue_latency: float = 0.0 num_prefill_prealloc_queue_reqs: int = 0 - num_prefill_infight_queue_reqs: int = 0 + num_prefill_inflight_queue_reqs: int = 0 num_decode_prealloc_queue_reqs: int = 0 num_decode_transfer_queue_reqs: int = 0 total_retracted_reqs: int = 0 @@ -235,9 +235,9 @@ class SchedulerMetricsCollector: multiprocess_mode="mostrecent", ) - self.num_prefill_infight_queue_reqs = Gauge( - name="sglang:num_prefill_infight_queue_reqs", - documentation="The number of requests in the prefill infight queue.", + self.num_prefill_inflight_queue_reqs = Gauge( + name="sglang:num_prefill_inflight_queue_reqs", + documentation="The number of requests in the prefill inflight queue.", labelnames=labels.keys(), multiprocess_mode="mostrecent", ) @@ -294,7 +294,7 @@ class SchedulerMetricsCollector: self.num_prefill_prealloc_queue_reqs, stats.num_prefill_prealloc_queue_reqs ) self._log_gauge( - self.num_prefill_infight_queue_reqs, stats.num_prefill_infight_queue_reqs + self.num_prefill_inflight_queue_reqs, stats.num_prefill_inflight_queue_reqs ) self._log_gauge( self.num_decode_prealloc_queue_reqs, stats.num_decode_prealloc_queue_reqs