Sync from v0.13

This commit is contained in:
2026-01-19 10:38:50 +08:00
parent b2ef04d792
commit 5aef6c175a
3714 changed files with 854317 additions and 89342 deletions

View File

@@ -1,27 +1,51 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Logging configuration for vLLM."""
import datetime
import json
import logging
import os
import sys
from functools import partial
from collections.abc import Generator, Hashable
from contextlib import contextmanager
from functools import lru_cache, partial
from logging import Logger
from logging.config import dictConfig
from os import path
from typing import Dict, Optional
from types import MethodType
from typing import Any, Literal, cast
import vllm.envs as envs
VLLM_CONFIGURE_LOGGING = envs.VLLM_CONFIGURE_LOGGING
VLLM_LOGGING_CONFIG_PATH = envs.VLLM_LOGGING_CONFIG_PATH
_FORMAT = "%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s"
_FORMAT = (
f"{envs.VLLM_LOGGING_PREFIX}%(levelname)s %(asctime)s "
"[%(fileinfo)s:%(lineno)d] %(message)s"
)
_DATE_FORMAT = "%m-%d %H:%M:%S"
def _use_color() -> bool:
if envs.NO_COLOR or envs.VLLM_LOGGING_COLOR == "0":
return False
if envs.VLLM_LOGGING_COLOR == "1":
return True
if envs.VLLM_LOGGING_STREAM == "ext://sys.stdout": # stdout
return hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
elif envs.VLLM_LOGGING_STREAM == "ext://sys.stderr": # stderr
return hasattr(sys.stderr, "isatty") and sys.stderr.isatty()
return False
DEFAULT_LOGGING_CONFIG = {
"formatters": {
"vllm": {
"class": "vllm.logging.NewLineFormatter",
"class": "vllm.logging_utils.NewLineFormatter",
"datefmt": _DATE_FORMAT,
"format": _FORMAT,
},
"vllm_color": {
"class": "vllm.logging_utils.ColoredFormatter",
"datefmt": _DATE_FORMAT,
"format": _FORMAT,
},
@@ -29,59 +53,175 @@ DEFAULT_LOGGING_CONFIG = {
"handlers": {
"vllm": {
"class": "logging.StreamHandler",
"formatter": "vllm",
"level": "INFO",
"stream": "ext://sys.stdout",
# Choose formatter based on color setting.
"formatter": "vllm_color" if _use_color() else "vllm",
"level": envs.VLLM_LOGGING_LEVEL,
"stream": envs.VLLM_LOGGING_STREAM,
},
},
"loggers": {
"vllm": {
"handlers": ["vllm"],
"level": "DEBUG",
"level": envs.VLLM_LOGGING_LEVEL,
"propagate": False,
},
},
"version": 1,
"disable_existing_loggers": False,
}
@lru_cache
def _print_debug_once(logger: Logger, msg: str, *args: Hashable) -> None:
# Set the stacklevel to 3 to print the original caller's line info
logger.debug(msg, *args, stacklevel=3)
@lru_cache
def _print_info_once(logger: Logger, msg: str, *args: Hashable) -> None:
# Set the stacklevel to 3 to print the original caller's line info
logger.info(msg, *args, stacklevel=3)
@lru_cache
def _print_warning_once(logger: Logger, msg: str, *args: Hashable) -> None:
# Set the stacklevel to 3 to print the original caller's line info
logger.warning(msg, *args, stacklevel=3)
LogScope = Literal["process", "global", "local"]
def _should_log_with_scope(scope: LogScope) -> bool:
"""Decide whether to log based on scope"""
if scope == "global":
from vllm.distributed.parallel_state import is_global_first_rank
return is_global_first_rank()
if scope == "local":
from vllm.distributed.parallel_state import is_local_first_rank
return is_local_first_rank()
# default "process" scope: always log
return True
class _VllmLogger(Logger):
"""
Note:
This class is just to provide type information.
We actually patch the methods directly on the [`logging.Logger`][]
instance to avoid conflicting with other libraries such as
`intel_extension_for_pytorch.utils._logger`.
"""
def debug_once(
self, msg: str, *args: Hashable, scope: LogScope = "process"
) -> None:
"""
As [`debug`][logging.Logger.debug], but subsequent calls with
the same message are silently dropped.
"""
if not _should_log_with_scope(scope):
return
_print_debug_once(self, msg, *args)
def info_once(self, msg: str, *args: Hashable, scope: LogScope = "process") -> None:
"""
As [`info`][logging.Logger.info], but subsequent calls with
the same message are silently dropped.
"""
if not _should_log_with_scope(scope):
return
_print_info_once(self, msg, *args)
def warning_once(
self, msg: str, *args: Hashable, scope: LogScope = "process"
) -> None:
"""
As [`warning`][logging.Logger.warning], but subsequent calls with
the same message are silently dropped.
"""
if not _should_log_with_scope(scope):
return
_print_warning_once(self, msg, *args)
# Pre-defined methods mapping to avoid repeated dictionary creation
_METHODS_TO_PATCH = {
"debug_once": _VllmLogger.debug_once,
"info_once": _VllmLogger.info_once,
"warning_once": _VllmLogger.warning_once,
}
def _configure_vllm_root_logger() -> None:
logging_config: Optional[Dict] = None
logging_config = dict[str, Any]()
if not VLLM_CONFIGURE_LOGGING and VLLM_LOGGING_CONFIG_PATH:
if not envs.VLLM_CONFIGURE_LOGGING and envs.VLLM_LOGGING_CONFIG_PATH:
raise RuntimeError(
"VLLM_CONFIGURE_LOGGING evaluated to false, but "
"VLLM_LOGGING_CONFIG_PATH was given. VLLM_LOGGING_CONFIG_PATH "
"implies VLLM_CONFIGURE_LOGGING. Please enable "
"VLLM_CONFIGURE_LOGGING or unset VLLM_LOGGING_CONFIG_PATH.")
"VLLM_CONFIGURE_LOGGING or unset VLLM_LOGGING_CONFIG_PATH."
)
if VLLM_CONFIGURE_LOGGING:
if envs.VLLM_CONFIGURE_LOGGING:
logging_config = DEFAULT_LOGGING_CONFIG
if VLLM_LOGGING_CONFIG_PATH:
if not path.exists(VLLM_LOGGING_CONFIG_PATH):
vllm_handler = logging_config["handlers"]["vllm"]
# Refresh these values in case env vars have changed.
vllm_handler["level"] = envs.VLLM_LOGGING_LEVEL
vllm_handler["stream"] = envs.VLLM_LOGGING_STREAM
vllm_handler["formatter"] = "vllm_color" if _use_color() else "vllm"
vllm_loggers = logging_config["loggers"]["vllm"]
vllm_loggers["level"] = envs.VLLM_LOGGING_LEVEL
if envs.VLLM_LOGGING_CONFIG_PATH:
if not path.exists(envs.VLLM_LOGGING_CONFIG_PATH):
raise RuntimeError(
"Could not load logging config. File does not exist: %s",
VLLM_LOGGING_CONFIG_PATH)
with open(VLLM_LOGGING_CONFIG_PATH, encoding="utf-8",
mode="r") as file:
envs.VLLM_LOGGING_CONFIG_PATH,
)
with open(envs.VLLM_LOGGING_CONFIG_PATH, encoding="utf-8") as file:
custom_config = json.loads(file.read())
if not isinstance(custom_config, dict):
raise ValueError("Invalid logging config. Expected Dict, got %s.",
type(custom_config).__name__)
raise ValueError(
"Invalid logging config. Expected dict, got %s.",
type(custom_config).__name__,
)
logging_config = custom_config
for formatter in logging_config.get("formatters", {}).values():
# This provides backwards compatibility after #10134.
if formatter.get("class") == "vllm.logging.NewLineFormatter":
formatter["class"] = "vllm.logging_utils.NewLineFormatter"
if logging_config:
dictConfig(logging_config)
def init_logger(name: str) -> Logger:
def init_logger(name: str) -> _VllmLogger:
"""The main purpose of this function is to ensure that loggers are
retrieved in such a way that we can be sure the root vllm logger has
already been configured."""
return logging.getLogger(name)
logger = logging.getLogger(name)
for method_name, method in _METHODS_TO_PATCH.items():
setattr(logger, method_name, MethodType(method, logger))
return cast(_VllmLogger, logger)
@contextmanager
def suppress_logging(level: int = logging.INFO) -> Generator[None, Any, None]:
current_level = logging.root.manager.disable
logging.disable(level)
yield
logging.disable(current_level)
# The root logger is initialized when the module is imported.
@@ -89,11 +229,16 @@ def init_logger(name: str) -> Logger:
# guaranteed by the Python GIL.
_configure_vllm_root_logger()
# Transformers uses httpx to access the Hugging Face Hub. httpx is quite verbose,
# so we set its logging level to WARNING when vLLM's logging level is INFO.
if envs.VLLM_LOGGING_LEVEL == "INFO":
logging.getLogger("httpx").setLevel(logging.WARNING)
logger = init_logger(__name__)
def _trace_calls(log_path, root_dir, frame, event, arg=None):
if event in ['call', 'return']:
if event in ["call", "return"]:
# Extract the filename, line number, function name, and the code object
filename = frame.f_code.co_filename
lineno = frame.f_lineno
@@ -113,25 +258,29 @@ def _trace_calls(log_path, root_dir, frame, event, arg=None):
last_filename = ""
last_lineno = 0
last_func_name = ""
with open(log_path, 'a') as f:
if event == 'call':
f.write(f"{datetime.datetime.now()} Call to"
f" {func_name} in {filename}:{lineno}"
f" from {last_func_name} in {last_filename}:"
f"{last_lineno}\n")
with open(log_path, "a") as f:
ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
if event == "call":
f.write(
f"{ts} Call to"
f" {func_name} in {filename}:{lineno}"
f" from {last_func_name} in {last_filename}:"
f"{last_lineno}\n"
)
else:
f.write(f"{datetime.datetime.now()} Return from"
f" {func_name} in {filename}:{lineno}"
f" to {last_func_name} in {last_filename}:"
f"{last_lineno}\n")
f.write(
f"{ts} Return from"
f" {func_name} in {filename}:{lineno}"
f" to {last_func_name} in {last_filename}:"
f"{last_lineno}\n"
)
except NameError:
# modules are deleted during shutdown
pass
return partial(_trace_calls, log_path, root_dir)
def enable_trace_function_call(log_file_path: str,
root_dir: Optional[str] = None):
def enable_trace_function_call(log_file_path: str, root_dir: str | None = None):
"""
Enable tracing of every function call in code under `root_dir`.
This is useful for debugging hangs or crashes.
@@ -145,7 +294,8 @@ def enable_trace_function_call(log_file_path: str,
logger.warning(
"VLLM_TRACE_FUNCTION is enabled. It will record every"
" function executed by Python. This will slow down the code. It "
"is suggested to be used for debugging hang or crashes only.")
"is suggested to be used for debugging hang or crashes only."
)
logger.info("Trace frame log is saved to %s", log_file_path)
if root_dir is None:
# by default, this is the vllm root directory