import time from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any, Optional from vllm.v1.spec_decode.metrics import SpecDecodingStats from vllm.v1.metrics.stats import PrefixCacheStats @dataclass class SchedulerStats: """Stats associated with the scheduler.""" num_running_reqs: int = 0 num_waiting_reqs: int = 0 running_seqlens: list[int] = None # These are used for internal DP load-balancing. step_counter: int = 0 current_wave: int = 0 kv_cache_usage: float = 0.0 prefix_cache_stats: PrefixCacheStats = field( default_factory=PrefixCacheStats) spec_decoding_stats: Optional[SpecDecodingStats] = None kv_connector_stats: Optional[dict[str, Any]] = None num_corrupted_reqs: int = 0