[PD] Improve disaggregation metrics output: update the metrics to keep reflecting real stats (#7317)

This commit is contained in:
SCDESPERTATE
2025-08-25 14:16:43 +08:00
committed by GitHub
parent ca4b86c564
commit b5c6529e17
4 changed files with 28 additions and 5 deletions

View File

@@ -125,6 +125,14 @@ class SchedulerMetricsMixin:
total_queue_latency += req.queue_time_end - req.queue_time_start
self.stats.avg_request_queue_latency = total_queue_latency / num_new_seq
if self.disaggregation_mode == DisaggregationMode.PREFILL:
self.stats.num_prefill_prealloc_queue_reqs = len(
self.disagg_prefill_bootstrap_queue.queue
)
self.stats.num_prefill_inflight_queue_reqs = len(
self.disagg_prefill_inflight_queue
)
self.metrics_collector.log_stats(self.stats)
self._emit_kv_metrics()
self._publish_kv_events()
@@ -202,6 +210,13 @@ class SchedulerMetricsMixin:
self.stats.spec_accept_length = spec_accept_length
self.stats.total_retracted_reqs = self.total_retracted_reqs
self.metrics_collector.log_stats(self.stats)
if self.disaggregation_mode == DisaggregationMode.DECODE:
self.stats.num_decode_prealloc_queue_reqs = len(
self.disagg_decode_prealloc_queue.queue
)
self.stats.num_decode_transfer_queue_reqs = len(
self.disagg_decode_transfer_queue.queue
)
self._emit_kv_metrics()
self._publish_kv_events()