Sync from v0.13
This commit is contained in:
257
vllm/v1/metrics/reader.py
Normal file
257
vllm/v1/metrics/reader.py
Normal file
@@ -0,0 +1,257 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from prometheus_client import REGISTRY
|
||||
from prometheus_client import Metric as PromMetric
|
||||
from prometheus_client.samples import Sample
|
||||
|
||||
|
||||
@dataclass
|
||||
class Metric:
|
||||
"""A base class for prometheus metrics.
|
||||
|
||||
Each metric may be associated with key=value labels, and
|
||||
in some cases a single vLLM instance may have multiple
|
||||
metrics with the same name but different sets of labels.
|
||||
"""
|
||||
|
||||
name: str
|
||||
labels: dict[str, str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Counter(Metric):
|
||||
"""A monotonically increasing integer counter."""
|
||||
|
||||
value: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class Vector(Metric):
|
||||
"""An ordered array of integer counters.
|
||||
|
||||
This type - which doesn't exist in Prometheus - models one very
|
||||
specific metric, vllm:spec_decode_num_accepted_tokens_per_pos.
|
||||
"""
|
||||
|
||||
values: list[int]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Gauge(Metric):
|
||||
"""A numerical value that can go up or down."""
|
||||
|
||||
value: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class Histogram(Metric):
|
||||
"""Observations recorded in configurable buckets.
|
||||
|
||||
Buckets are represented by a dictionary. The key is
|
||||
the upper limit of the bucket, and the value is the
|
||||
observed count in that bucket. A '+Inf' key always
|
||||
exists.
|
||||
|
||||
The count property is the total count across all
|
||||
buckets, identical to the count of the '+Inf' bucket.
|
||||
|
||||
The sum property is the total sum of all observed
|
||||
values.
|
||||
"""
|
||||
|
||||
count: int
|
||||
sum: float
|
||||
buckets: dict[str, int]
|
||||
|
||||
|
||||
def get_metrics_snapshot() -> list[Metric]:
|
||||
"""An API for accessing in-memory Prometheus metrics.
|
||||
|
||||
Example:
|
||||
>>> for metric in llm.get_metrics():
|
||||
... if isinstance(metric, Counter):
|
||||
... print(f"{metric} = {metric.value}")
|
||||
... elif isinstance(metric, Gauge):
|
||||
... print(f"{metric} = {metric.value}")
|
||||
... elif isinstance(metric, Histogram):
|
||||
... print(f"{metric}")
|
||||
... print(f" sum = {metric.sum}")
|
||||
... print(f" count = {metric.count}")
|
||||
... for bucket_le, value in metrics.buckets.items():
|
||||
... print(f" {bucket_le} = {value}")
|
||||
"""
|
||||
collected: list[Metric] = []
|
||||
for metric in REGISTRY.collect():
|
||||
if not metric.name.startswith("vllm:"):
|
||||
continue
|
||||
if metric.type == "gauge":
|
||||
samples = _get_samples(metric)
|
||||
for s in samples:
|
||||
collected.append(
|
||||
Gauge(name=metric.name, labels=s.labels, value=s.value)
|
||||
)
|
||||
elif metric.type == "counter":
|
||||
samples = _get_samples(metric, "_total")
|
||||
if metric.name == "vllm:spec_decode_num_accepted_tokens_per_pos":
|
||||
#
|
||||
# Ugly vllm:num_accepted_tokens_per_pos special case.
|
||||
#
|
||||
# This metric is a vector of counters - for each spec
|
||||
# decoding token position, we observe the number of
|
||||
# accepted tokens using a Counter labeled with 'position'.
|
||||
# We convert these into a vector of integer values.
|
||||
#
|
||||
for labels, values in _digest_num_accepted_by_pos_samples(samples):
|
||||
collected.append(
|
||||
Vector(name=metric.name, labels=labels, values=values)
|
||||
)
|
||||
else:
|
||||
for s in samples:
|
||||
collected.append(
|
||||
Counter(name=metric.name, labels=s.labels, value=int(s.value))
|
||||
)
|
||||
|
||||
elif metric.type == "histogram":
|
||||
#
|
||||
# A histogram has a number of '_bucket' samples where
|
||||
# the 'le' label represents the upper limit of the bucket.
|
||||
# We convert these bucketized values into a dict of values
|
||||
# indexed by the value of the 'le' label. The 'le=+Inf'
|
||||
# label is a special case, catching all values observed.
|
||||
#
|
||||
bucket_samples = _get_samples(metric, "_bucket")
|
||||
count_samples = _get_samples(metric, "_count")
|
||||
sum_samples = _get_samples(metric, "_sum")
|
||||
for labels, buckets, count_value, sum_value in _digest_histogram(
|
||||
bucket_samples, count_samples, sum_samples
|
||||
):
|
||||
collected.append(
|
||||
Histogram(
|
||||
name=metric.name,
|
||||
labels=labels,
|
||||
buckets=buckets,
|
||||
count=count_value,
|
||||
sum=sum_value,
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise AssertionError(f"Unknown metric type {metric.type}")
|
||||
|
||||
return collected
|
||||
|
||||
|
||||
def _get_samples(metric: PromMetric, suffix: str | None = None) -> list[Sample]:
|
||||
name = (metric.name + suffix) if suffix is not None else metric.name
|
||||
return [s for s in metric.samples if s.name == name]
|
||||
|
||||
|
||||
def _strip_label(labels: dict[str, str], key_to_remove: str) -> dict[str, str]:
|
||||
labels_copy = labels.copy()
|
||||
labels_copy.pop(key_to_remove)
|
||||
return labels_copy
|
||||
|
||||
|
||||
def _digest_histogram(
|
||||
bucket_samples: list[Sample], count_samples: list[Sample], sum_samples: list[Sample]
|
||||
) -> list[tuple[dict[str, str], dict[str, int], int, float]]:
|
||||
#
|
||||
# In the case of DP, we have an indigestable
|
||||
# per-bucket-per-engine count as a list of labelled
|
||||
# samples, along with total and sum samples
|
||||
#
|
||||
# bucket_samples (in):
|
||||
# labels = {bucket: 100, idx: 0}, value = 2
|
||||
# labels = {bucket: 200, idx: 0}, value = 4
|
||||
# labels = {bucket: Inf, idx: 0}, value = 10
|
||||
# labels = {bucket: 100, idx: 1}, value = 1
|
||||
# labels = {bucket: 200, idx: 2}, value = 5
|
||||
# labels = {bucket: Inf, idx: 3}, value = 7
|
||||
# count_samples (in):
|
||||
# labels = {idx: 0}, value = 10
|
||||
# labels = {idx: 1}, value = 7
|
||||
# sum_samples (in):
|
||||
# labels = {idx: 0}, value = 2000
|
||||
# labels = {idx: 1}, value = 1200
|
||||
#
|
||||
# output: [
|
||||
# {idx: 0}, {"100": 2, "200": 4, "Inf": 10}, 10, 2000
|
||||
# {idx: 1}, {"100": 1, "200": 5, "Inf": 7}, 7, 1200
|
||||
# ]
|
||||
buckets_by_labels: dict[frozenset[tuple[str, str]], dict[str, int]] = {}
|
||||
for s in bucket_samples:
|
||||
bucket = s.labels["le"]
|
||||
labels_key = frozenset(_strip_label(s.labels, "le").items())
|
||||
if labels_key not in buckets_by_labels:
|
||||
buckets_by_labels[labels_key] = {}
|
||||
buckets_by_labels[labels_key][bucket] = int(s.value)
|
||||
|
||||
counts_by_labels: dict[frozenset[tuple[str, str]], int] = {}
|
||||
for s in count_samples:
|
||||
labels_key = frozenset(s.labels.items())
|
||||
counts_by_labels[labels_key] = int(s.value)
|
||||
|
||||
sums_by_labels: dict[frozenset[tuple[str, str]], float] = {}
|
||||
for s in sum_samples:
|
||||
labels_key = frozenset(s.labels.items())
|
||||
sums_by_labels[labels_key] = s.value
|
||||
|
||||
assert (
|
||||
set(buckets_by_labels.keys())
|
||||
== set(counts_by_labels.keys())
|
||||
== set(sums_by_labels.keys())
|
||||
)
|
||||
|
||||
output = []
|
||||
label_keys = list(buckets_by_labels.keys())
|
||||
for k in label_keys:
|
||||
labels = dict(k)
|
||||
output.append(
|
||||
(labels, buckets_by_labels[k], counts_by_labels[k], sums_by_labels[k])
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def _digest_num_accepted_by_pos_samples(
|
||||
samples: list[Sample],
|
||||
) -> list[tuple[dict[str, str], list[int]]]:
|
||||
#
|
||||
# In the case of DP, we have an indigestable
|
||||
# per-position-per-engine count as a list of
|
||||
# labelled samples
|
||||
#
|
||||
# samples (in):
|
||||
# labels = {pos: 0, idx: 0}, value = 10
|
||||
# labels = {pos: 1, idx: 0}, value = 7
|
||||
# labels = {pos: 2, idx: 0}, value = 2
|
||||
# labels = {pos: 0, idx: 1}, value = 5
|
||||
# labels = {pos: 1, idx: 1}, value = 3
|
||||
# labels = {pos: 2, idx: 1}, value = 1
|
||||
#
|
||||
# output: [
|
||||
# {idx: 0}, [10, 7, 2]
|
||||
# {idx: 1}, [5, 3, 1]
|
||||
# ]
|
||||
#
|
||||
max_pos = 0
|
||||
values_by_labels: dict[frozenset[tuple[str, str]], dict[int, int]] = {}
|
||||
|
||||
for s in samples:
|
||||
position = int(s.labels["position"])
|
||||
max_pos = max(max_pos, position)
|
||||
|
||||
labels_key = frozenset(_strip_label(s.labels, "position").items())
|
||||
if labels_key not in values_by_labels:
|
||||
values_by_labels[labels_key] = {}
|
||||
values_by_labels[labels_key][position] = int(s.value)
|
||||
|
||||
output = []
|
||||
for labels_key, values_by_position in values_by_labels.items():
|
||||
labels = dict(labels_key)
|
||||
values = [0] * (max_pos + 1)
|
||||
for pos, val in values_by_position.items():
|
||||
values[pos] = val
|
||||
output.append((labels, values))
|
||||
return output
|
||||
Reference in New Issue
Block a user