forked from EngineX-Hygon/enginex-hygon-vllm
init src 0.9.2
This commit is contained in:
14
vllm/triton_utils/__init__.py
Normal file
14
vllm/triton_utils/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from vllm.triton_utils.importing import (HAS_TRITON, TritonLanguagePlaceholder,
|
||||
TritonPlaceholder)
|
||||
|
||||
if HAS_TRITON:
|
||||
import triton
|
||||
import triton.language as tl
|
||||
else:
|
||||
triton = TritonPlaceholder()
|
||||
tl = TritonLanguagePlaceholder()
|
||||
|
||||
__all__ = ["HAS_TRITON", "triton", "tl"]
|
||||
55
vllm/triton_utils/custom_cache_manager.py
Normal file
55
vllm/triton_utils/custom_cache_manager.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
|
||||
from triton.runtime.cache import (FileCacheManager, default_cache_dir,
|
||||
default_dump_dir, default_override_dir)
|
||||
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def maybe_set_triton_cache_manager() -> None:
|
||||
"""Set environment variable to tell Triton to use a
|
||||
custom cache manager"""
|
||||
cache_manger = os.environ.get("TRITON_CACHE_MANAGER", None)
|
||||
if cache_manger is None:
|
||||
manager = "vllm.triton_utils.custom_cache_manager:CustomCacheManager"
|
||||
logger.info("Setting Triton cache manager to: %s", manager)
|
||||
os.environ["TRITON_CACHE_MANAGER"] = manager
|
||||
|
||||
|
||||
class CustomCacheManager(FileCacheManager):
|
||||
"""Re-implements Triton's cache manager, ensuring that a
|
||||
unique cache directory is created for each process. This is
|
||||
needed to avoid collisions when running with tp>1 and
|
||||
using multi-processing as the distributed backend.
|
||||
|
||||
Note this issue was fixed by triton-lang/triton/pull/4295,
|
||||
but the fix is not yet included in triton==v3.0.0. However,
|
||||
it should be included in the subsequent version.
|
||||
"""
|
||||
|
||||
def __init__(self, key, override=False, dump=False):
|
||||
self.key = key
|
||||
self.lock_path = None
|
||||
if dump:
|
||||
self.cache_dir = default_dump_dir()
|
||||
self.cache_dir = os.path.join(self.cache_dir, self.key)
|
||||
self.lock_path = os.path.join(self.cache_dir, "lock")
|
||||
os.makedirs(self.cache_dir, exist_ok=True)
|
||||
elif override:
|
||||
self.cache_dir = default_override_dir()
|
||||
self.cache_dir = os.path.join(self.cache_dir, self.key)
|
||||
else:
|
||||
# create cache directory if it doesn't exist
|
||||
self.cache_dir = os.getenv("TRITON_CACHE_DIR",
|
||||
"").strip() or default_cache_dir()
|
||||
if self.cache_dir:
|
||||
# self.cache_dir = f"{self.cache_dir}_{os.getpid()}"
|
||||
self.cache_dir = os.path.join(self.cache_dir, self.key)
|
||||
self.lock_path = os.path.join(self.cache_dir, "lock")
|
||||
os.makedirs(self.cache_dir, exist_ok=True)
|
||||
else:
|
||||
raise RuntimeError("Could not create or locate cache dir")
|
||||
94
vllm/triton_utils/importing.py
Normal file
94
vllm/triton_utils/importing.py
Normal file
@@ -0,0 +1,94 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import os
|
||||
import types
|
||||
from importlib.util import find_spec
|
||||
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
HAS_TRITON = (
|
||||
find_spec("triton") is not None
|
||||
or find_spec("pytorch-triton-xpu") is not None # Not compatible
|
||||
)
|
||||
if HAS_TRITON:
|
||||
try:
|
||||
from triton.backends import backends
|
||||
|
||||
# It's generally expected that x.driver exists and has
|
||||
# an is_active method.
|
||||
# The `x.driver and` check adds a small layer of safety.
|
||||
active_drivers = [
|
||||
x.driver for x in backends.values()
|
||||
if x.driver and x.driver.is_active()
|
||||
]
|
||||
|
||||
# Check if we're in a distributed environment where CUDA_VISIBLE_DEVICES
|
||||
# might be temporarily empty (e.g., Ray sets it to "" during actor init)
|
||||
cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES")
|
||||
is_distributed_env = (cuda_visible_devices is not None
|
||||
and len(cuda_visible_devices.strip()) == 0)
|
||||
|
||||
# Apply lenient driver check for distributed environments
|
||||
if is_distributed_env and len(active_drivers) == 0:
|
||||
# Allow 0 drivers in distributed environments - they may become
|
||||
# active later when CUDA context is properly initialized
|
||||
logger.debug(
|
||||
"Triton found 0 active drivers in distributed environment. "
|
||||
"This is expected during initialization.")
|
||||
elif not is_distributed_env and len(active_drivers) != 1:
|
||||
# Strict check for non-distributed environments
|
||||
logger.info(
|
||||
"Triton is installed but %d active driver(s) found "
|
||||
"(expected 1). Disabling Triton to prevent runtime errors.",
|
||||
len(active_drivers))
|
||||
HAS_TRITON = False
|
||||
except ImportError:
|
||||
# This can occur if Triton is partially installed or triton.backends
|
||||
# is missing.
|
||||
logger.warning(
|
||||
"Triton is installed, but `triton.backends` could not be imported. "
|
||||
"Disabling Triton.")
|
||||
HAS_TRITON = False
|
||||
except Exception as e:
|
||||
# Catch any other unexpected errors during the check.
|
||||
logger.warning(
|
||||
"An unexpected error occurred while checking Triton active drivers:"
|
||||
" %s. Disabling Triton.", e)
|
||||
HAS_TRITON = False
|
||||
|
||||
if not HAS_TRITON:
|
||||
logger.info("Triton not installed or not compatible; certain GPU-related"
|
||||
" functions will not be available.")
|
||||
|
||||
|
||||
class TritonPlaceholder(types.ModuleType):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__("triton")
|
||||
self.__version__ = "3.3.0"
|
||||
self.jit = self._dummy_decorator("jit")
|
||||
self.autotune = self._dummy_decorator("autotune")
|
||||
self.heuristics = self._dummy_decorator("heuristics")
|
||||
self.Config = self._dummy_decorator("Config")
|
||||
self.language = TritonLanguagePlaceholder()
|
||||
|
||||
def _dummy_decorator(self, name):
|
||||
|
||||
def decorator(*args, **kwargs):
|
||||
if args and callable(args[0]):
|
||||
return args[0]
|
||||
return lambda f: f
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
class TritonLanguagePlaceholder(types.ModuleType):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__("triton.language")
|
||||
self.constexpr = None
|
||||
self.dtype = None
|
||||
self.int64 = None
|
||||
Reference in New Issue
Block a user