init

2026-04-02 04:53:13 +00:00
parent 80932c96e5
commit 24df76db9d
1987 changed files with 447445 additions and 0 deletions
--- a/vllm_vacc/vllm/v1/metrics/init.py
+++ b/vllm_vacc/vllm/v1/metrics/init.py
--- a/vllm_vacc/vllm/v1/metrics/pycache/init.cpython-312.pyc
+++ b/vllm_vacc/vllm/v1/metrics/pycache/init.cpython-312.pyc
--- a/vllm_vacc/vllm/v1/metrics/pycache/loggers.cpython-312.pyc
+++ b/vllm_vacc/vllm/v1/metrics/pycache/loggers.cpython-312.pyc
--- a/vllm_vacc/vllm/v1/metrics/pycache/stats.cpython-312.pyc
+++ b/vllm_vacc/vllm/v1/metrics/pycache/stats.cpython-312.pyc
--- a/vllm_vacc/vllm/v1/metrics/loggers.py
+++ b/vllm_vacc/vllm/v1/metrics/loggers.py
@@ -0,0 +1,49 @@
+
+import time
+
+from vllm.v1.metrics.loggers import StatLoggerBase
+from vllm.v1.metrics.loggers import logger
+
+class LoggingStatLogger(StatLoggerBase):
+    
+    def log(self):
+        now = time.monotonic()
+        prompt_throughput = self._get_throughput(self.num_prompt_tokens, now)
+        generation_throughput = self._get_throughput(
+            self.num_generation_tokens, now)
+
+        self._reset(now)
+
+        scheduler_stats = self.last_scheduler_stats
+        
+        log_fn = logger.info
+        if not any(
+            (prompt_throughput, generation_throughput,
+             self.last_prompt_throughput, self.last_generation_throughput)):
+            # Avoid log noise on an idle production system
+            log_fn = logger.debug
+
+        self.last_generation_throughput = generation_throughput
+        self.last_prompt_throughput = prompt_throughput
+
+        # Format and print output.
+        log_fn(
+            "Engine %03d: "
+            "Avg prompt throughput: %.1f tokens/s, "
+            "Avg generation throughput: %.1f tokens/s, "
+            "Running: %d reqs, Waiting: %d reqs, "
+            "GPU KV cache usage: %.1f%%, "
+            "Prefix cache hit rate: %.1f%%, "
+            "running seqlens: %s ",
+            self.engine_index,
+            prompt_throughput,
+            generation_throughput,
+            scheduler_stats.num_running_reqs,
+            scheduler_stats.num_waiting_reqs,
+            scheduler_stats.kv_cache_usage * 100,
+            self.prefix_caching_metrics.hit_rate * 100,
+            str(scheduler_stats.running_seqlens),
+        )
+        self.spec_decoding_logging.log(log_fn=log_fn)
+        self.kv_transfer_logging.log(log_fn=log_fn)
+        
--- a/vllm_vacc/vllm/v1/metrics/stats.py
+++ b/vllm_vacc/vllm/v1/metrics/stats.py
@@ -0,0 +1,32 @@
+
+import time
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any, Optional
+
+from vllm.v1.spec_decode.metrics import SpecDecodingStats
+from vllm.v1.metrics.stats import PrefixCacheStats
+    
+    
+@dataclass
+class SchedulerStats:
+    """Stats associated with the scheduler."""
+
+    num_running_reqs: int = 0
+    num_waiting_reqs: int = 0
+
+    running_seqlens: list[int] = None
+    
+    # These are used for internal DP load-balancing.
+    step_counter: int = 0
+    current_wave: int = 0
+
+    kv_cache_usage: float = 0.0
+
+    prefix_cache_stats: PrefixCacheStats = field(
+        default_factory=PrefixCacheStats)
+
+    spec_decoding_stats: Optional[SpecDecodingStats] = None
+    kv_connector_stats: Optional[dict[str, Any]] = None
+
+    num_corrupted_reqs: int = 0
+