# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from dataclasses import dataclass, field from typing import Any, TypeAlias, TypeVar from prometheus_client import Counter, Gauge, Histogram from vllm.config import KVTransferConfig, VllmConfig from vllm.distributed.kv_transfer.kv_connector.factory import KVConnectorFactory from vllm.logger import init_logger PromMetric: TypeAlias = Gauge | Counter | Histogram PromMetricT = TypeVar("PromMetricT", bound=PromMetric) logger = init_logger(__name__) @dataclass class KVConnectorStats: """ Base class for KV Connector Stats, a container for transfer performance metrics or otherwise important telemetry from the connector. All sub-classes need to be serializable as stats are sent from worker to logger process. """ data: dict[str, Any] = field(default_factory=dict) def reset(self): """Reset the stats, clear the state.""" raise NotImplementedError def aggregate(self, other: "KVConnectorStats") -> "KVConnectorStats": """ Aggregate stats with another `KVConnectorStats` object. """ raise NotImplementedError def reduce(self) -> dict[str, int | float]: """ Reduce the observations collected during a time interval to one or more representative values (eg avg/median/sum of the series). This is meant to be called by the logger to produce a summary of the stats for the last time interval. """ raise NotImplementedError def is_empty(self) -> bool: """Return True if the stats are empty.""" raise NotImplementedError class KVConnectorLogging: def __init__(self, kv_transfer_config: KVTransferConfig | None): # Instantiate the connector's stats class. if kv_transfer_config and kv_transfer_config.kv_connector: self.connector_cls = KVConnectorFactory.get_connector_class( kv_transfer_config ) self.reset() def reset(self): self.transfer_stats_accumulator: KVConnectorStats | None = None def observe(self, transfer_stats_data: dict[str, Any]): # Should not be called when a KVConnector is not configured. assert self.connector_cls is not None # Called periodically when connector syncs with the scheduler. # Note that this is not the same as the logging interval. # We expect transfer_stats_data to be aggregated across all workers and # consist of observations from a single connector or a MultiConnector. transfer_stats = self.connector_cls.build_kv_connector_stats( transfer_stats_data ) if transfer_stats is None: logger.warning_once( "The connector %s is collecting stats but " "does not implement the " "`build_kv_connector_stats` method. " "Stats will not be logged.", self.connector_cls, ) return if self.transfer_stats_accumulator is None: self.transfer_stats_accumulator = transfer_stats else: # Accumulate last interval stats. self.transfer_stats_accumulator = self.transfer_stats_accumulator.aggregate( transfer_stats ) def log(self, log_fn=logger.info): """Log transfer metrics periodically, similar to throughput logging""" if ( self.transfer_stats_accumulator and not self.transfer_stats_accumulator.is_empty() ): # Produce a single cumulative stats object for the last time # interval from the recorded observations. xfer_metrics = self.transfer_stats_accumulator.reduce() xfer_metrics_str = ", ".join(f"{k}={v}" for k, v in xfer_metrics.items()) log_fn("KV Transfer metrics: %s", xfer_metrics_str) # Reset metrics for next interval self.reset() class KVConnectorPromMetrics: """ A base class for per-connector Prometheus metric registration and recording. """ def __init__( self, vllm_config: VllmConfig, metric_types: dict[type[PromMetric], type[PromMetricT]], labelnames: list[str], per_engine_labelvalues: dict[int, list[object]], ): self._kv_transfer_config = vllm_config.kv_transfer_config self._gauge_cls = metric_types[Gauge] self._counter_cls = metric_types[Counter] self._histogram_cls = metric_types[Histogram] self._labelnames = labelnames self._per_engine_labelvalues = per_engine_labelvalues def make_per_engine(self, metric: PromMetric) -> dict[int, PromMetric]: """ Create a per-engine child of a prometheus_client.Metric with the appropriate labels set. The parent metric must be created using the labelnames list. """ return { idx: metric.labels(*labelvalues) for idx, labelvalues in self._per_engine_labelvalues.items() } def observe(self, transfer_stats_data: dict[str, Any], engine_idx: int = 0): """ Record the supplied transfer statistics to Prometheus metrics. These statistics are engine-specific, and should be recorded to a metric with the appropriate 'engine' label. These metric instances can be created using the make_per_engine() helper method. """ raise NotImplementedError class KVConnectorPrometheus: """ Support for registering per-connector Prometheus metrics, and recording transfer statistics to those metrics. Uses KVConnectorBase.build_prom_metrics(). """ _gauge_cls = Gauge _counter_cls = Counter _histogram_cls = Histogram def __init__( self, vllm_config: VllmConfig, labelnames: list[str], per_engine_labelvalues: dict[int, list[object]], ): self.prom_metrics: KVConnectorPromMetrics | None = None kv_transfer_config = vllm_config.kv_transfer_config if kv_transfer_config and kv_transfer_config.kv_connector: connector_cls = KVConnectorFactory.get_connector_class(kv_transfer_config) metric_types = { Gauge: self._gauge_cls, Counter: self._counter_cls, Histogram: self._histogram_cls, } self.prom_metrics = connector_cls.build_prom_metrics( vllm_config, metric_types, labelnames, per_engine_labelvalues, ) def observe(self, transfer_stats_data: dict[str, Any], engine_idx: int = 0): if self.prom_metrics is None: return self.prom_metrics.observe(transfer_stats_data, engine_idx)