[PD metrics] Fix some uncompleted PD related metrics (#8627)
This commit is contained in:
@@ -1513,6 +1513,20 @@ class Scheduler(
|
||||
self.stats.gen_throughput = 0
|
||||
self.stats.num_queue_reqs = len(self.waiting_queue)
|
||||
self.stats.num_grammar_queue_reqs = len(self.grammar_queue)
|
||||
if self.disaggregation_mode == DisaggregationMode.PREFILL:
|
||||
self.stats.num_prefill_prealloc_queue_reqs = len(
|
||||
self.disagg_prefill_bootstrap_queue.queue
|
||||
)
|
||||
self.stats.num_prefill_inflight_queue_reqs = len(
|
||||
self.disagg_prefill_inflight_queue
|
||||
)
|
||||
if self.disaggregation_mode == DisaggregationMode.DECODE:
|
||||
self.stats.num_decode_prealloc_queue_reqs = len(
|
||||
self.disagg_decode_prealloc_queue.queue
|
||||
)
|
||||
self.stats.num_decode_transfer_queue_reqs = len(
|
||||
self.disagg_decode_transfer_queue.queue
|
||||
)
|
||||
self.metrics_collector.log_stats(self.stats)
|
||||
self._publish_kv_events()
|
||||
|
||||
|
||||
@@ -230,7 +230,7 @@ class SchedulerMetricsMixin:
|
||||
self.stats.num_grammar_queue_reqs = len(self.grammar_queue)
|
||||
self.stats.spec_accept_length = spec_accept_length
|
||||
self.stats.total_retracted_reqs = self.total_retracted_reqs
|
||||
self.metrics_collector.log_stats(self.stats)
|
||||
self.stats.avg_request_queue_latency = 0.0
|
||||
if self.disaggregation_mode == DisaggregationMode.DECODE:
|
||||
self.stats.num_decode_prealloc_queue_reqs = len(
|
||||
self.disagg_decode_prealloc_queue.queue
|
||||
@@ -238,6 +238,7 @@ class SchedulerMetricsMixin:
|
||||
self.stats.num_decode_transfer_queue_reqs = len(
|
||||
self.disagg_decode_transfer_queue.queue
|
||||
)
|
||||
self.metrics_collector.log_stats(self.stats)
|
||||
self._emit_kv_metrics()
|
||||
self._publish_kv_events()
|
||||
|
||||
|
||||
@@ -539,6 +539,7 @@ class SchedulerMetricsCollector:
|
||||
self.num_running_reqs_offline_batch, stats.num_running_reqs_offline_batch
|
||||
)
|
||||
self._log_gauge(self.cache_hit_rate, stats.cache_hit_rate)
|
||||
self._log_gauge(self.avg_request_queue_latency, stats.avg_request_queue_latency)
|
||||
|
||||
# Speculative decoding
|
||||
self._log_gauge(self.spec_accept_length, stats.spec_accept_length)
|
||||
|
||||
Reference in New Issue
Block a user