32 lines
792 B
Python
32 lines
792 B
Python
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from typing import TYPE_CHECKING, Any, Optional
|
|
|
|
from vllm.v1.spec_decode.metrics import SpecDecodingStats
|
|
from vllm.v1.metrics.stats import PrefixCacheStats
|
|
|
|
|
|
@dataclass
|
|
class SchedulerStats:
|
|
"""Stats associated with the scheduler."""
|
|
|
|
num_running_reqs: int = 0
|
|
num_waiting_reqs: int = 0
|
|
|
|
running_seqlens: list[int] = None
|
|
|
|
# These are used for internal DP load-balancing.
|
|
step_counter: int = 0
|
|
current_wave: int = 0
|
|
|
|
kv_cache_usage: float = 0.0
|
|
|
|
prefix_cache_stats: PrefixCacheStats = field(
|
|
default_factory=PrefixCacheStats)
|
|
|
|
spec_decoding_stats: Optional[SpecDecodingStats] = None
|
|
kv_connector_stats: Optional[dict[str, Any]] = None
|
|
|
|
num_corrupted_reqs: int = 0
|
|
|