[PD] Improve disaggregation metrics output: update the metrics to keep reflecting real stats (#7317)
This commit is contained in:
@@ -125,6 +125,14 @@ class SchedulerMetricsMixin:
|
||||
total_queue_latency += req.queue_time_end - req.queue_time_start
|
||||
self.stats.avg_request_queue_latency = total_queue_latency / num_new_seq
|
||||
|
||||
if self.disaggregation_mode == DisaggregationMode.PREFILL:
|
||||
self.stats.num_prefill_prealloc_queue_reqs = len(
|
||||
self.disagg_prefill_bootstrap_queue.queue
|
||||
)
|
||||
self.stats.num_prefill_inflight_queue_reqs = len(
|
||||
self.disagg_prefill_inflight_queue
|
||||
)
|
||||
|
||||
self.metrics_collector.log_stats(self.stats)
|
||||
self._emit_kv_metrics()
|
||||
self._publish_kv_events()
|
||||
@@ -202,6 +210,13 @@ class SchedulerMetricsMixin:
|
||||
self.stats.spec_accept_length = spec_accept_length
|
||||
self.stats.total_retracted_reqs = self.total_retracted_reqs
|
||||
self.metrics_collector.log_stats(self.stats)
|
||||
if self.disaggregation_mode == DisaggregationMode.DECODE:
|
||||
self.stats.num_decode_prealloc_queue_reqs = len(
|
||||
self.disagg_decode_prealloc_queue.queue
|
||||
)
|
||||
self.stats.num_decode_transfer_queue_reqs = len(
|
||||
self.disagg_decode_transfer_queue.queue
|
||||
)
|
||||
self._emit_kv_metrics()
|
||||
self._publish_kv_events()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user