Sync from v0.13

This commit is contained in:
2026-01-19 10:38:50 +08:00
parent b2ef04d792
commit 5aef6c175a
3714 changed files with 854317 additions and 89342 deletions

View File

@@ -0,0 +1,13 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from vllm.logging_utils.formatter import ColoredFormatter, NewLineFormatter
from vllm.logging_utils.lazy import lazy
from vllm.logging_utils.log_time import logtime
__all__ = [
"NewLineFormatter",
"ColoredFormatter",
"lazy",
"logtime",
]

View File

@@ -0,0 +1,83 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import contextlib
import enum
import json
import torch
from vllm.config import VllmConfig
from vllm.logger import init_logger
from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.metrics.stats import SchedulerStats
from vllm.version import __version__ as VLLM_VERSION
logger = init_logger(__name__)
def prepare_object_to_dump(obj) -> str:
if isinstance(obj, str):
return f"'{obj}'" # Double quotes
elif isinstance(obj, dict):
dict_str = ", ".join(
{f"{str(k)}: {prepare_object_to_dump(v)}" for k, v in obj.items()}
)
return f"{{{dict_str}}}"
elif isinstance(obj, list):
return f"[{', '.join([prepare_object_to_dump(v) for v in obj])}]"
elif isinstance(obj, set):
return f"[{', '.join([prepare_object_to_dump(v) for v in list(obj)])}]"
# return [prepare_object_to_dump(v) for v in list(obj)]
elif isinstance(obj, tuple):
return f"[{', '.join([prepare_object_to_dump(v) for v in obj])}]"
elif isinstance(obj, enum.Enum):
return repr(obj)
elif isinstance(obj, torch.Tensor):
# We only print the 'draft' of the tensor to not expose sensitive data
# and to get some metadata in case of CUDA runtime crashed
return f"Tensor(shape={obj.shape}, device={obj.device},dtype={obj.dtype})"
elif hasattr(obj, "anon_repr"):
return obj.anon_repr()
elif hasattr(obj, "__dict__"):
items = obj.__dict__.items()
dict_str = ", ".join(
[f"{str(k)}={prepare_object_to_dump(v)}" for k, v in items]
)
return f"{type(obj).__name__}({dict_str})"
else:
# Hacky way to make sure we can serialize the object in JSON format
try:
return json.dumps(obj)
except (TypeError, OverflowError):
return repr(obj)
def dump_engine_exception(
config: VllmConfig,
scheduler_output: SchedulerOutput,
scheduler_stats: SchedulerStats | None,
):
# NOTE: ensure we can log extra info without risking raises
# unexpected errors during logging
with contextlib.suppress(Exception):
_dump_engine_exception(config, scheduler_output, scheduler_stats)
def _dump_engine_exception(
config: VllmConfig,
scheduler_output: SchedulerOutput,
scheduler_stats: SchedulerStats | None,
):
logger.error(
"Dumping input data for V1 LLM engine (v%s) with config: %s, ",
VLLM_VERSION,
config,
)
try:
dump_obj = prepare_object_to_dump(scheduler_output)
logger.error("Dumping scheduler output for model execution: %s", dump_obj)
if scheduler_stats:
logger.error("Dumping scheduler stats: %s", scheduler_stats)
except Exception:
logger.exception("Error preparing object to dump")

View File

@@ -0,0 +1,127 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import logging
from pathlib import Path
from vllm import envs
class NewLineFormatter(logging.Formatter):
"""Adds logging prefix to newlines to align multi-line messages."""
def __init__(self, fmt, datefmt=None, style="%"):
super().__init__(fmt, datefmt, style)
self.use_relpath = envs.VLLM_LOGGING_LEVEL == "DEBUG"
if self.use_relpath:
self.root_dir = Path(__file__).resolve().parent.parent.parent
def format(self, record):
def shrink_path(relpath: Path) -> str:
"""
Shortens a file path for logging display:
- Removes leading 'vllm' folder if present.
- If path starts with 'v1',
keeps the first two and last two levels,
collapsing the middle as '...'.
- Otherwise, keeps the first and last two levels,
collapsing the middle as '...'.
- If the path is short, returns it as-is.
- Examples:
vllm/model_executor/layers/quantization/utils/fp8_utils.py ->
model_executor/.../quantization/utils/fp8_utils.py
vllm/model_executor/layers/quantization/awq.py ->
model_executor/layers/quantization/awq.py
vllm/v1/attention/backends/mla/common.py ->
v1/attention/backends/mla/common.py
Args:
relpath (Path): The relative path to be shortened.
Returns:
str: The shortened path string for display.
"""
parts = list(relpath.parts)
new_parts = []
if parts and parts[0] == "vllm":
parts = parts[1:]
if parts and parts[0] == "v1":
new_parts += parts[:2]
parts = parts[2:]
elif parts:
new_parts += parts[:1]
parts = parts[1:]
if len(parts) > 2:
new_parts += ["..."] + parts[-2:]
else:
new_parts += parts
return "/".join(new_parts)
if self.use_relpath:
abs_path = getattr(record, "pathname", None)
if abs_path:
try:
relpath = Path(abs_path).resolve().relative_to(self.root_dir)
except Exception:
relpath = Path(record.filename)
else:
relpath = Path(record.filename)
record.fileinfo = shrink_path(relpath)
else:
record.fileinfo = record.filename
msg = super().format(record)
if record.message != "":
parts = msg.split(record.message)
msg = msg.replace("\n", "\r\n" + parts[0])
return msg
class ColoredFormatter(NewLineFormatter):
"""Adds ANSI color codes to log levels for terminal output.
This formatter adds colors by injecting them into the format string for
static elements (timestamp, filename, line number) and modifying the
levelname attribute for dynamic color selection.
"""
# ANSI color codes
COLORS = {
"DEBUG": "\033[37m", # White
"INFO": "\033[32m", # Green
"WARNING": "\033[33m", # Yellow
"ERROR": "\033[31m", # Red
"CRITICAL": "\033[35m", # Magenta
}
GREY = "\033[90m" # Grey for timestamp and file info
RESET = "\033[0m"
def __init__(self, fmt, datefmt=None, style="%"):
# Inject grey color codes into format string for timestamp and file info
if fmt:
# Wrap %(asctime)s with grey
fmt = fmt.replace("%(asctime)s", f"{self.GREY}%(asctime)s{self.RESET}")
# Wrap [%(fileinfo)s:%(lineno)d] with grey
fmt = fmt.replace(
"[%(fileinfo)s:%(lineno)d]",
f"{self.GREY}[%(fileinfo)s:%(lineno)d]{self.RESET}",
)
# Call parent __init__ with potentially modified format string
super().__init__(fmt, datefmt, style)
def format(self, record):
# Store original levelname to restore later (in case record is reused)
orig_levelname = record.levelname
# Only modify levelname - it needs dynamic color based on severity
if (color_code := self.COLORS.get(record.levelname)) is not None:
record.levelname = f"{color_code}{record.levelname}{self.RESET}"
# Call parent format which will handle everything else
msg = super().format(record)
# Restore original levelname
record.levelname = orig_levelname
return msg

View File

@@ -0,0 +1,20 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections.abc import Callable
from typing import Any
class lazy:
"""Wrap a zero-argument callable evaluated only during log formatting."""
__slots__ = ("_factory",)
def __init__(self, factory: Callable[[], Any]) -> None:
self._factory = factory
def __str__(self) -> str:
return str(self._factory())
def __repr__(self) -> str:
return str(self)

View File

@@ -0,0 +1,34 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Provides a timeslice logging decorator
"""
import functools
import time
def logtime(logger, msg=None):
"""
Logs the execution time of the decorated function.
Always place it beneath other decorators.
"""
def _inner(func):
@functools.wraps(func)
def _wrapper(*args, **kwargs):
start = time.perf_counter()
result = func(*args, **kwargs)
elapsed = time.perf_counter() - start
prefix = (
f"Function '{func.__module__}.{func.__qualname__}'"
if msg is None
else msg
)
logger.debug("%s: Elapsed time %.7f secs", prefix, elapsed)
return result
return _wrapper
return _inner