Sync from v0.13
This commit is contained in:
20
vllm/triton_utils/__init__.py
Normal file
20
vllm/triton_utils/__init__.py
Normal file
@@ -0,0 +1,20 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from vllm.triton_utils.importing import (
|
||||
HAS_TRITON,
|
||||
TritonLanguagePlaceholder,
|
||||
TritonPlaceholder,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING or HAS_TRITON:
|
||||
import triton
|
||||
import triton.language as tl
|
||||
import triton.language.extra.libdevice as tldevice
|
||||
else:
|
||||
triton = TritonPlaceholder()
|
||||
tl = TritonLanguagePlaceholder()
|
||||
tldevice = TritonLanguagePlaceholder()
|
||||
|
||||
__all__ = ["HAS_TRITON", "triton", "tl", "tldevice"]
|
||||
103
vllm/triton_utils/importing.py
Normal file
103
vllm/triton_utils/importing.py
Normal file
@@ -0,0 +1,103 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import os
|
||||
import types
|
||||
from importlib.util import find_spec
|
||||
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
HAS_TRITON = (
|
||||
find_spec("triton") is not None
|
||||
or find_spec("pytorch-triton-xpu") is not None # Not compatible
|
||||
)
|
||||
if HAS_TRITON:
|
||||
try:
|
||||
from triton.backends import backends
|
||||
|
||||
# It's generally expected that x.driver exists and has
|
||||
# an is_active method.
|
||||
# The `x.driver and` check adds a small layer of safety.
|
||||
active_drivers = [
|
||||
x.driver for x in backends.values() if x.driver and x.driver.is_active()
|
||||
]
|
||||
|
||||
# Check if we're in a distributed environment where CUDA_VISIBLE_DEVICES
|
||||
# might be temporarily empty (e.g., Ray sets it to "" during actor init)
|
||||
cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES")
|
||||
is_distributed_env = (
|
||||
cuda_visible_devices is not None and len(cuda_visible_devices.strip()) == 0
|
||||
)
|
||||
|
||||
# Apply lenient driver check for distributed environments
|
||||
if is_distributed_env and len(active_drivers) == 0:
|
||||
# Allow 0 drivers in distributed environments - they may become
|
||||
# active later when CUDA context is properly initialized
|
||||
logger.debug(
|
||||
"Triton found 0 active drivers in distributed environment. "
|
||||
"This is expected during initialization."
|
||||
)
|
||||
elif not is_distributed_env and len(active_drivers) != 1:
|
||||
# Strict check for non-distributed environments
|
||||
logger.info(
|
||||
"Triton is installed but %d active driver(s) found "
|
||||
"(expected 1). Disabling Triton to prevent runtime errors.",
|
||||
len(active_drivers),
|
||||
)
|
||||
HAS_TRITON = False
|
||||
except ImportError:
|
||||
# This can occur if Triton is partially installed or triton.backends
|
||||
# is missing.
|
||||
logger.warning(
|
||||
"Triton is installed, but `triton.backends` could not be imported. "
|
||||
"Disabling Triton."
|
||||
)
|
||||
HAS_TRITON = False
|
||||
except Exception as e:
|
||||
# Catch any other unexpected errors during the check.
|
||||
logger.warning(
|
||||
"An unexpected error occurred while checking Triton active drivers:"
|
||||
" %s. Disabling Triton.",
|
||||
e,
|
||||
)
|
||||
HAS_TRITON = False
|
||||
|
||||
if not HAS_TRITON:
|
||||
logger.info(
|
||||
"Triton not installed or not compatible; certain GPU-related"
|
||||
" functions will not be available."
|
||||
)
|
||||
|
||||
|
||||
class TritonPlaceholder(types.ModuleType):
|
||||
def __init__(self):
|
||||
super().__init__("triton")
|
||||
self.__version__ = "3.4.0"
|
||||
self.jit = self._dummy_decorator("jit")
|
||||
self.autotune = self._dummy_decorator("autotune")
|
||||
self.heuristics = self._dummy_decorator("heuristics")
|
||||
self.Config = self._dummy_decorator("Config")
|
||||
self.language = TritonLanguagePlaceholder()
|
||||
|
||||
def _dummy_decorator(self, name):
|
||||
def decorator(*args, **kwargs):
|
||||
if args and callable(args[0]):
|
||||
return args[0]
|
||||
return lambda f: f
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
class TritonLanguagePlaceholder(types.ModuleType):
|
||||
def __init__(self):
|
||||
super().__init__("triton.language")
|
||||
self.constexpr = None
|
||||
self.dtype = None
|
||||
self.int64 = None
|
||||
self.int32 = None
|
||||
self.tensor = None
|
||||
self.exp = None
|
||||
self.log = None
|
||||
self.log2 = None
|
||||
Reference in New Issue
Block a user