Sync from v0.13

2026-01-19 10:38:50 +08:00
parent b2ef04d792
commit 5aef6c175a
3714 changed files with 854317 additions and 89342 deletions
--- a/vllm/logging_utils/init.py
+++ b/vllm/logging_utils/init.py
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from vllm.logging_utils.formatter import ColoredFormatter, NewLineFormatter
+from vllm.logging_utils.lazy import lazy
+from vllm.logging_utils.log_time import logtime
+
+__all__ = [
+    "NewLineFormatter",
+    "ColoredFormatter",
+    "lazy",
+    "logtime",
+]
--- a/vllm/logging_utils/dump_input.py
+++ b/vllm/logging_utils/dump_input.py
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import contextlib
+import enum
+import json
+
+import torch
+
+from vllm.config import VllmConfig
+from vllm.logger import init_logger
+from vllm.v1.core.sched.output import SchedulerOutput
+from vllm.v1.metrics.stats import SchedulerStats
+from vllm.version import __version__ as VLLM_VERSION
+
+logger = init_logger(__name__)
+
+
+def prepare_object_to_dump(obj) -> str:
+    if isinstance(obj, str):
+        return f"'{obj}'"  # Double quotes
+    elif isinstance(obj, dict):
+        dict_str = ", ".join(
+            {f"{str(k)}: {prepare_object_to_dump(v)}" for k, v in obj.items()}
+        )
+        return f"{{{dict_str}}}"
+    elif isinstance(obj, list):
+        return f"[{', '.join([prepare_object_to_dump(v) for v in obj])}]"
+    elif isinstance(obj, set):
+        return f"[{', '.join([prepare_object_to_dump(v) for v in list(obj)])}]"
+        # return [prepare_object_to_dump(v) for v in list(obj)]
+    elif isinstance(obj, tuple):
+        return f"[{', '.join([prepare_object_to_dump(v) for v in obj])}]"
+    elif isinstance(obj, enum.Enum):
+        return repr(obj)
+    elif isinstance(obj, torch.Tensor):
+        # We only print the 'draft' of the tensor to not expose sensitive data
+        # and to get some metadata in case of CUDA runtime crashed
+        return f"Tensor(shape={obj.shape}, device={obj.device},dtype={obj.dtype})"
+    elif hasattr(obj, "anon_repr"):
+        return obj.anon_repr()
+    elif hasattr(obj, "__dict__"):
+        items = obj.__dict__.items()
+        dict_str = ", ".join(
+            [f"{str(k)}={prepare_object_to_dump(v)}" for k, v in items]
+        )
+        return f"{type(obj).__name__}({dict_str})"
+    else:
+        # Hacky way to make sure we can serialize the object in JSON format
+        try:
+            return json.dumps(obj)
+        except (TypeError, OverflowError):
+            return repr(obj)
+
+
+def dump_engine_exception(
+    config: VllmConfig,
+    scheduler_output: SchedulerOutput,
+    scheduler_stats: SchedulerStats | None,
+):
+    # NOTE: ensure we can log extra info without risking raises
+    # unexpected errors during logging
+    with contextlib.suppress(Exception):
+        _dump_engine_exception(config, scheduler_output, scheduler_stats)
+
+
+def _dump_engine_exception(
+    config: VllmConfig,
+    scheduler_output: SchedulerOutput,
+    scheduler_stats: SchedulerStats | None,
+):
+    logger.error(
+        "Dumping input data for V1 LLM engine (v%s) with config: %s, ",
+        VLLM_VERSION,
+        config,
+    )
+    try:
+        dump_obj = prepare_object_to_dump(scheduler_output)
+        logger.error("Dumping scheduler output for model execution: %s", dump_obj)
+        if scheduler_stats:
+            logger.error("Dumping scheduler stats: %s", scheduler_stats)
+    except Exception:
+        logger.exception("Error preparing object to dump")
--- a/vllm/logging_utils/formatter.py
+++ b/vllm/logging_utils/formatter.py
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import logging
+from pathlib import Path
+
+from vllm import envs
+
+
+class NewLineFormatter(logging.Formatter):
+    """Adds logging prefix to newlines to align multi-line messages."""
+
+    def __init__(self, fmt, datefmt=None, style="%"):
+        super().__init__(fmt, datefmt, style)
+
+        self.use_relpath = envs.VLLM_LOGGING_LEVEL == "DEBUG"
+        if self.use_relpath:
+            self.root_dir = Path(__file__).resolve().parent.parent.parent
+
+    def format(self, record):
+        def shrink_path(relpath: Path) -> str:
+            """
+            Shortens a file path for logging display:
+            - Removes leading 'vllm' folder if present.
+            - If path starts with 'v1',
+            keeps the first two and last two levels,
+            collapsing the middle as '...'.
+            - Otherwise, keeps the first and last two levels,
+            collapsing the middle as '...'.
+            - If the path is short, returns it as-is.
+            - Examples:
+            vllm/model_executor/layers/quantization/utils/fp8_utils.py ->
+            model_executor/.../quantization/utils/fp8_utils.py
+            vllm/model_executor/layers/quantization/awq.py ->
+            model_executor/layers/quantization/awq.py
+            vllm/v1/attention/backends/mla/common.py ->
+            v1/attention/backends/mla/common.py
+
+            Args:
+                relpath (Path): The relative path to be shortened.
+            Returns:
+                str: The shortened path string for display.
+            """
+            parts = list(relpath.parts)
+            new_parts = []
+            if parts and parts[0] == "vllm":
+                parts = parts[1:]
+            if parts and parts[0] == "v1":
+                new_parts += parts[:2]
+                parts = parts[2:]
+            elif parts:
+                new_parts += parts[:1]
+                parts = parts[1:]
+            if len(parts) > 2:
+                new_parts += ["..."] + parts[-2:]
+            else:
+                new_parts += parts
+            return "/".join(new_parts)
+
+        if self.use_relpath:
+            abs_path = getattr(record, "pathname", None)
+            if abs_path:
+                try:
+                    relpath = Path(abs_path).resolve().relative_to(self.root_dir)
+                except Exception:
+                    relpath = Path(record.filename)
+            else:
+                relpath = Path(record.filename)
+            record.fileinfo = shrink_path(relpath)
+        else:
+            record.fileinfo = record.filename
+
+        msg = super().format(record)
+        if record.message != "":
+            parts = msg.split(record.message)
+            msg = msg.replace("\n", "\r\n" + parts[0])
+        return msg
+
+
+class ColoredFormatter(NewLineFormatter):
+    """Adds ANSI color codes to log levels for terminal output.
+
+    This formatter adds colors by injecting them into the format string for
+    static elements (timestamp, filename, line number) and modifying the
+    levelname attribute for dynamic color selection.
+    """
+
+    # ANSI color codes
+    COLORS = {
+        "DEBUG": "\033[37m",  # White
+        "INFO": "\033[32m",  # Green
+        "WARNING": "\033[33m",  # Yellow
+        "ERROR": "\033[31m",  # Red
+        "CRITICAL": "\033[35m",  # Magenta
+    }
+    GREY = "\033[90m"  # Grey for timestamp and file info
+    RESET = "\033[0m"
+
+    def __init__(self, fmt, datefmt=None, style="%"):
+        # Inject grey color codes into format string for timestamp and file info
+        if fmt:
+            # Wrap %(asctime)s with grey
+            fmt = fmt.replace("%(asctime)s", f"{self.GREY}%(asctime)s{self.RESET}")
+            # Wrap [%(fileinfo)s:%(lineno)d] with grey
+            fmt = fmt.replace(
+                "[%(fileinfo)s:%(lineno)d]",
+                f"{self.GREY}[%(fileinfo)s:%(lineno)d]{self.RESET}",
+            )
+
+        # Call parent __init__ with potentially modified format string
+        super().__init__(fmt, datefmt, style)
+
+    def format(self, record):
+        # Store original levelname to restore later (in case record is reused)
+        orig_levelname = record.levelname
+
+        # Only modify levelname - it needs dynamic color based on severity
+        if (color_code := self.COLORS.get(record.levelname)) is not None:
+            record.levelname = f"{color_code}{record.levelname}{self.RESET}"
+
+        # Call parent format which will handle everything else
+        msg = super().format(record)
+
+        # Restore original levelname
+        record.levelname = orig_levelname
+
+        return msg
--- a/vllm/logging_utils/lazy.py
+++ b/vllm/logging_utils/lazy.py
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Callable
+from typing import Any
+
+
+class lazy:
+    """Wrap a zero-argument callable evaluated only during log formatting."""
+
+    __slots__ = ("_factory",)
+
+    def __init__(self, factory: Callable[[], Any]) -> None:
+        self._factory = factory
+
+    def __str__(self) -> str:
+        return str(self._factory())
+
+    def __repr__(self) -> str:
+        return str(self)
--- a/vllm/logging_utils/log_time.py
+++ b/vllm/logging_utils/log_time.py
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Provides a timeslice logging decorator
+"""
+
+import functools
+import time
+
+
+def logtime(logger, msg=None):
+    """
+    Logs the execution time of the decorated function.
+    Always place it beneath other decorators.
+    """
+
+    def _inner(func):
+        @functools.wraps(func)
+        def _wrapper(*args, **kwargs):
+            start = time.perf_counter()
+            result = func(*args, **kwargs)
+            elapsed = time.perf_counter() - start
+
+            prefix = (
+                f"Function '{func.__module__}.{func.__qualname__}'"
+                if msg is None
+                else msg
+            )
+            logger.debug("%s: Elapsed time %.7f secs", prefix, elapsed)
+            return result
+
+        return _wrapper
+
+    return _inner