init src 0.9.2

This commit is contained in:
2026-01-09 15:09:53 +08:00
parent 0eb2c0a4b3
commit 41d98d4359
1438 changed files with 417605 additions and 683 deletions

View File

@@ -0,0 +1,14 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from vllm.triton_utils.importing import (HAS_TRITON, TritonLanguagePlaceholder,
TritonPlaceholder)
if HAS_TRITON:
import triton
import triton.language as tl
else:
triton = TritonPlaceholder()
tl = TritonLanguagePlaceholder()
__all__ = ["HAS_TRITON", "triton", "tl"]

View File

@@ -0,0 +1,55 @@
# SPDX-License-Identifier: Apache-2.0
import os
from triton.runtime.cache import (FileCacheManager, default_cache_dir,
default_dump_dir, default_override_dir)
from vllm.logger import init_logger
logger = init_logger(__name__)
def maybe_set_triton_cache_manager() -> None:
"""Set environment variable to tell Triton to use a
custom cache manager"""
cache_manger = os.environ.get("TRITON_CACHE_MANAGER", None)
if cache_manger is None:
manager = "vllm.triton_utils.custom_cache_manager:CustomCacheManager"
logger.info("Setting Triton cache manager to: %s", manager)
os.environ["TRITON_CACHE_MANAGER"] = manager
class CustomCacheManager(FileCacheManager):
"""Re-implements Triton's cache manager, ensuring that a
unique cache directory is created for each process. This is
needed to avoid collisions when running with tp>1 and
using multi-processing as the distributed backend.
Note this issue was fixed by triton-lang/triton/pull/4295,
but the fix is not yet included in triton==v3.0.0. However,
it should be included in the subsequent version.
"""
def __init__(self, key, override=False, dump=False):
self.key = key
self.lock_path = None
if dump:
self.cache_dir = default_dump_dir()
self.cache_dir = os.path.join(self.cache_dir, self.key)
self.lock_path = os.path.join(self.cache_dir, "lock")
os.makedirs(self.cache_dir, exist_ok=True)
elif override:
self.cache_dir = default_override_dir()
self.cache_dir = os.path.join(self.cache_dir, self.key)
else:
# create cache directory if it doesn't exist
self.cache_dir = os.getenv("TRITON_CACHE_DIR",
"").strip() or default_cache_dir()
if self.cache_dir:
# self.cache_dir = f"{self.cache_dir}_{os.getpid()}"
self.cache_dir = os.path.join(self.cache_dir, self.key)
self.lock_path = os.path.join(self.cache_dir, "lock")
os.makedirs(self.cache_dir, exist_ok=True)
else:
raise RuntimeError("Could not create or locate cache dir")

View File

@@ -0,0 +1,94 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import os
import types
from importlib.util import find_spec
from vllm.logger import init_logger
logger = init_logger(__name__)
HAS_TRITON = (
find_spec("triton") is not None
or find_spec("pytorch-triton-xpu") is not None # Not compatible
)
if HAS_TRITON:
try:
from triton.backends import backends
# It's generally expected that x.driver exists and has
# an is_active method.
# The `x.driver and` check adds a small layer of safety.
active_drivers = [
x.driver for x in backends.values()
if x.driver and x.driver.is_active()
]
# Check if we're in a distributed environment where CUDA_VISIBLE_DEVICES
# might be temporarily empty (e.g., Ray sets it to "" during actor init)
cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES")
is_distributed_env = (cuda_visible_devices is not None
and len(cuda_visible_devices.strip()) == 0)
# Apply lenient driver check for distributed environments
if is_distributed_env and len(active_drivers) == 0:
# Allow 0 drivers in distributed environments - they may become
# active later when CUDA context is properly initialized
logger.debug(
"Triton found 0 active drivers in distributed environment. "
"This is expected during initialization.")
elif not is_distributed_env and len(active_drivers) != 1:
# Strict check for non-distributed environments
logger.info(
"Triton is installed but %d active driver(s) found "
"(expected 1). Disabling Triton to prevent runtime errors.",
len(active_drivers))
HAS_TRITON = False
except ImportError:
# This can occur if Triton is partially installed or triton.backends
# is missing.
logger.warning(
"Triton is installed, but `triton.backends` could not be imported. "
"Disabling Triton.")
HAS_TRITON = False
except Exception as e:
# Catch any other unexpected errors during the check.
logger.warning(
"An unexpected error occurred while checking Triton active drivers:"
" %s. Disabling Triton.", e)
HAS_TRITON = False
if not HAS_TRITON:
logger.info("Triton not installed or not compatible; certain GPU-related"
" functions will not be available.")
class TritonPlaceholder(types.ModuleType):
def __init__(self):
super().__init__("triton")
self.__version__ = "3.3.0"
self.jit = self._dummy_decorator("jit")
self.autotune = self._dummy_decorator("autotune")
self.heuristics = self._dummy_decorator("heuristics")
self.Config = self._dummy_decorator("Config")
self.language = TritonLanguagePlaceholder()
def _dummy_decorator(self, name):
def decorator(*args, **kwargs):
if args and callable(args[0]):
return args[0]
return lambda f: f
return decorator
class TritonLanguagePlaceholder(types.ModuleType):
def __init__(self):
super().__init__("triton.language")
self.constexpr = None
self.dtype = None
self.int64 = None