init
This commit is contained in:
0
vllm_vacc/vllm/v1/metrics/__init__.py
Normal file
0
vllm_vacc/vllm/v1/metrics/__init__.py
Normal file
BIN
vllm_vacc/vllm/v1/metrics/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
vllm_vacc/vllm/v1/metrics/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
vllm_vacc/vllm/v1/metrics/__pycache__/loggers.cpython-312.pyc
Normal file
BIN
vllm_vacc/vllm/v1/metrics/__pycache__/loggers.cpython-312.pyc
Normal file
Binary file not shown.
BIN
vllm_vacc/vllm/v1/metrics/__pycache__/stats.cpython-312.pyc
Normal file
BIN
vllm_vacc/vllm/v1/metrics/__pycache__/stats.cpython-312.pyc
Normal file
Binary file not shown.
49
vllm_vacc/vllm/v1/metrics/loggers.py
Normal file
49
vllm_vacc/vllm/v1/metrics/loggers.py
Normal file
@@ -0,0 +1,49 @@
|
||||
|
||||
import time
|
||||
|
||||
from vllm.v1.metrics.loggers import StatLoggerBase
|
||||
from vllm.v1.metrics.loggers import logger
|
||||
|
||||
class LoggingStatLogger(StatLoggerBase):
|
||||
|
||||
def log(self):
|
||||
now = time.monotonic()
|
||||
prompt_throughput = self._get_throughput(self.num_prompt_tokens, now)
|
||||
generation_throughput = self._get_throughput(
|
||||
self.num_generation_tokens, now)
|
||||
|
||||
self._reset(now)
|
||||
|
||||
scheduler_stats = self.last_scheduler_stats
|
||||
|
||||
log_fn = logger.info
|
||||
if not any(
|
||||
(prompt_throughput, generation_throughput,
|
||||
self.last_prompt_throughput, self.last_generation_throughput)):
|
||||
# Avoid log noise on an idle production system
|
||||
log_fn = logger.debug
|
||||
|
||||
self.last_generation_throughput = generation_throughput
|
||||
self.last_prompt_throughput = prompt_throughput
|
||||
|
||||
# Format and print output.
|
||||
log_fn(
|
||||
"Engine %03d: "
|
||||
"Avg prompt throughput: %.1f tokens/s, "
|
||||
"Avg generation throughput: %.1f tokens/s, "
|
||||
"Running: %d reqs, Waiting: %d reqs, "
|
||||
"GPU KV cache usage: %.1f%%, "
|
||||
"Prefix cache hit rate: %.1f%%, "
|
||||
"running seqlens: %s ",
|
||||
self.engine_index,
|
||||
prompt_throughput,
|
||||
generation_throughput,
|
||||
scheduler_stats.num_running_reqs,
|
||||
scheduler_stats.num_waiting_reqs,
|
||||
scheduler_stats.kv_cache_usage * 100,
|
||||
self.prefix_caching_metrics.hit_rate * 100,
|
||||
str(scheduler_stats.running_seqlens),
|
||||
)
|
||||
self.spec_decoding_logging.log(log_fn=log_fn)
|
||||
self.kv_transfer_logging.log(log_fn=log_fn)
|
||||
|
||||
32
vllm_vacc/vllm/v1/metrics/stats.py
Normal file
32
vllm_vacc/vllm/v1/metrics/stats.py
Normal file
@@ -0,0 +1,32 @@
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
from vllm.v1.spec_decode.metrics import SpecDecodingStats
|
||||
from vllm.v1.metrics.stats import PrefixCacheStats
|
||||
|
||||
|
||||
@dataclass
|
||||
class SchedulerStats:
|
||||
"""Stats associated with the scheduler."""
|
||||
|
||||
num_running_reqs: int = 0
|
||||
num_waiting_reqs: int = 0
|
||||
|
||||
running_seqlens: list[int] = None
|
||||
|
||||
# These are used for internal DP load-balancing.
|
||||
step_counter: int = 0
|
||||
current_wave: int = 0
|
||||
|
||||
kv_cache_usage: float = 0.0
|
||||
|
||||
prefix_cache_stats: PrefixCacheStats = field(
|
||||
default_factory=PrefixCacheStats)
|
||||
|
||||
spec_decoding_stats: Optional[SpecDecodingStats] = None
|
||||
kv_connector_stats: Optional[dict[str, Any]] = None
|
||||
|
||||
num_corrupted_reqs: int = 0
|
||||
|
||||
Reference in New Issue
Block a user