277 lines
10 KiB
Python
277 lines
10 KiB
Python
|
|
# SPDX-License-Identifier: Apache-2.0
|
||
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM-MLU project
|
||
|
|
import importlib.util
|
||
|
|
import io
|
||
|
|
import logging
|
||
|
|
import os
|
||
|
|
import re
|
||
|
|
import subprocess
|
||
|
|
import sys
|
||
|
|
|
||
|
|
from sysconfig import get_paths
|
||
|
|
from typing import List, Dict
|
||
|
|
from setuptools import Extension
|
||
|
|
from setuptools import find_namespace_packages, setup
|
||
|
|
from setuptools.command.build_ext import build_ext
|
||
|
|
from setuptools.command.install import install
|
||
|
|
from setuptools.command.develop import develop
|
||
|
|
|
||
|
|
ROOT_DIR = os.path.dirname(__file__)
|
||
|
|
logger = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
def check_or_set_default_env(cmake_args, env_name, env_variable, default_path=""):
|
||
|
|
if env_variable is None:
|
||
|
|
logging.warning(f"Set default {env_name}: {default_path}")
|
||
|
|
env_variable = default_path
|
||
|
|
else:
|
||
|
|
logging.info(f"Found existing {env_name}: {env_variable}")
|
||
|
|
cmake_args += [f"-D{env_name}={env_variable}"]
|
||
|
|
return cmake_args
|
||
|
|
|
||
|
|
def load_module_from_path(module_name, path):
|
||
|
|
spec = importlib.util.spec_from_file_location(module_name, path)
|
||
|
|
module = importlib.util.module_from_spec(spec)
|
||
|
|
sys.modules[module_name] = module
|
||
|
|
spec.loader.exec_module(module)
|
||
|
|
return module
|
||
|
|
|
||
|
|
envs = load_module_from_path("envs", os.path.join(ROOT_DIR, "vllm_mlu", "envs.py"))
|
||
|
|
class CMakeExtension(Extension):
|
||
|
|
def __init__(self,
|
||
|
|
name: str,
|
||
|
|
cmake_lists_dir: str = ".",
|
||
|
|
**kwargs) -> None:
|
||
|
|
super().__init__(name, sources=[], py_limited_api=False, **kwargs)
|
||
|
|
self.cmake_lists_dir = os.path.abspath(cmake_lists_dir)
|
||
|
|
|
||
|
|
|
||
|
|
def get_path(*filepath) -> str:
|
||
|
|
return os.path.join(ROOT_DIR, *filepath)
|
||
|
|
|
||
|
|
|
||
|
|
def get_vllm_version() -> str:
|
||
|
|
"""
|
||
|
|
get vllm version
|
||
|
|
"""
|
||
|
|
with open(get_path("tools/build.property"), 'r') as file:
|
||
|
|
content = file.read()
|
||
|
|
|
||
|
|
results = re.findall(r'VLLM_VERSION=([\d|\.]+)\+mlu([\d|\.]+)\.pt(\d+)', content)
|
||
|
|
|
||
|
|
assert results, "fail to get vllm, vllm_mlu and pytorch version."
|
||
|
|
|
||
|
|
version = f"{results[-1][0]}+mlu{results[-1][1]}.pt{results[-1][2]}"
|
||
|
|
|
||
|
|
return version
|
||
|
|
|
||
|
|
|
||
|
|
def read_readme() -> str:
|
||
|
|
"""Read the README file if present."""
|
||
|
|
p = get_path("README.md")
|
||
|
|
if os.path.isfile(p):
|
||
|
|
return io.open(get_path("README.md"), "r", encoding="utf-8").read()
|
||
|
|
else:
|
||
|
|
return ""
|
||
|
|
|
||
|
|
|
||
|
|
def get_requirements() -> List[str]:
|
||
|
|
"""Get Python package dependencies from requirements.txt."""
|
||
|
|
|
||
|
|
def _read_requirements(filename: str) -> List[str]:
|
||
|
|
with open(get_path(filename)) as f:
|
||
|
|
requirements = f.read().strip().split("\n")
|
||
|
|
resolved_requirements = []
|
||
|
|
for line in requirements:
|
||
|
|
if line.startswith("-r "):
|
||
|
|
resolved_requirements += _read_requirements(line.split()[1])
|
||
|
|
elif line.startswith("--"):
|
||
|
|
continue
|
||
|
|
else:
|
||
|
|
resolved_requirements.append(line)
|
||
|
|
return resolved_requirements
|
||
|
|
return _read_requirements("requirements.txt")
|
||
|
|
|
||
|
|
|
||
|
|
class cmake_build_ext(build_ext):
|
||
|
|
# A dict of extension directories that have been configured.
|
||
|
|
did_config: Dict[str, bool] = {}
|
||
|
|
|
||
|
|
# Determine number of compilation jobs
|
||
|
|
def compute_num_jobs(self):
|
||
|
|
# `num_jobs` is either the value of the MAX_JOBS environment variable
|
||
|
|
# (if defined) or the number of CPUs available.
|
||
|
|
num_jobs = envs.MAX_JOBS
|
||
|
|
if num_jobs is not None:
|
||
|
|
num_jobs = int(num_jobs)
|
||
|
|
logger.info("Using MAX_JOBS=%d as the number of jobs.", num_jobs)
|
||
|
|
else:
|
||
|
|
try:
|
||
|
|
# os.sched_getaffinity() isn't universally available, so fall
|
||
|
|
# back to os.cpu_count() if we get an error here.
|
||
|
|
num_jobs = len(os.sched_getaffinity(0))
|
||
|
|
except AttributeError:
|
||
|
|
num_jobs = os.cpu_count()
|
||
|
|
num_jobs = max(1, num_jobs)
|
||
|
|
|
||
|
|
return num_jobs
|
||
|
|
|
||
|
|
#
|
||
|
|
# Perform cmake configuration for a single extension.
|
||
|
|
#
|
||
|
|
def configure(self, ext: CMakeExtension) -> None:
|
||
|
|
os.makedirs(self.build_temp, exist_ok=True)
|
||
|
|
source_dir = os.path.abspath(ROOT_DIR)
|
||
|
|
python_executable = sys.executable
|
||
|
|
cmake_args = ["cmake"]
|
||
|
|
# Default use release mode to compile the csrc code
|
||
|
|
# Turbo now support compiled with Release, Debug and RelWithDebugInfo
|
||
|
|
if envs.CMAKE_BUILD_TYPE is None or envs.CMAKE_BUILD_TYPE not in [
|
||
|
|
"Debug",
|
||
|
|
"Release",
|
||
|
|
"RelWithDebugInfo",
|
||
|
|
]:
|
||
|
|
envs.CMAKE_BUILD_TYPE = "Release"
|
||
|
|
cmake_args += [f"-DCMAKE_BUILD_TYPE={envs.CMAKE_BUILD_TYPE}"]
|
||
|
|
# Default dump the compile commands for lsp
|
||
|
|
cmake_args += ["-DCMAKE_EXPORT_COMPILE_COMMANDS=1"]
|
||
|
|
if envs.CXX_COMPILER is not None:
|
||
|
|
cmake_args += [f"-DCMAKE_CXX_COMPILER={envs.CXX_COMPILER}"]
|
||
|
|
if envs.C_COMPILER is not None:
|
||
|
|
cmake_args += [f"-DCMAKE_C_COMPILER={envs.C_COMPILER}"]
|
||
|
|
if envs.VERBOSE:
|
||
|
|
cmake_args += ["-DCMAKE_VERBOSE_MAKEFILE=ON"]
|
||
|
|
|
||
|
|
# find PYTHON_EXECUTABLE
|
||
|
|
check_or_set_default_env(cmake_args, "PYTHON_EXECUTABLE", sys.executable)
|
||
|
|
|
||
|
|
# find PYTHON_INCLUDE_PATH
|
||
|
|
check_or_set_default_env(cmake_args, "PYTHON_INCLUDE_PATH",
|
||
|
|
get_paths()["include"])
|
||
|
|
|
||
|
|
try:
|
||
|
|
# if pybind11 is installed via pip
|
||
|
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "pybind11==2.13.6"])
|
||
|
|
pybind11_cmake_path = (subprocess.check_output([python_executable, "-m",
|
||
|
|
"pybind11", "--cmake"]).decode().strip())
|
||
|
|
except subprocess.CalledProcessError as e:
|
||
|
|
# else specify pybind11 path installed from source code on CI container
|
||
|
|
raise RuntimeError(f"CMake configuration failed: {e}")
|
||
|
|
|
||
|
|
install_path = os.path.join(ROOT_DIR, self.build_lib)
|
||
|
|
if isinstance(self.distribution.get_command_obj("develop"), develop):
|
||
|
|
install_path = os.path.join(ROOT_DIR, "vllm_mlu")
|
||
|
|
|
||
|
|
# add CMAKE_INSTALL_PATH
|
||
|
|
cmake_args += [f"-DCMAKE_INSTALL_PREFIX={install_path}"]
|
||
|
|
|
||
|
|
cmake_args += [f"-DCMAKE_PREFIX_PATH={pybind11_cmake_path}"]
|
||
|
|
|
||
|
|
cmake_args += [source_dir]
|
||
|
|
logging.info(f"cmake config command: {cmake_args}")
|
||
|
|
try:
|
||
|
|
subprocess.check_call(cmake_args, cwd=self.build_temp)
|
||
|
|
except subprocess.CalledProcessError as e:
|
||
|
|
raise RuntimeError(f"CMake configuration failed: {e}")
|
||
|
|
|
||
|
|
def build_extensions(self) -> None:
|
||
|
|
if not envs.COMPILE_CUSTOM_KERNELS:
|
||
|
|
return
|
||
|
|
# Ensure that CMake is present and working
|
||
|
|
try:
|
||
|
|
subprocess.check_output(["cmake", "--version"])
|
||
|
|
except OSError as e:
|
||
|
|
raise RuntimeError(f"Cannot find CMake executable: {e}")
|
||
|
|
|
||
|
|
# Create build directory if it does not exist.
|
||
|
|
if not os.path.exists(self.build_temp):
|
||
|
|
os.makedirs(self.build_temp)
|
||
|
|
os.makedirs(os.path.join(self.build_lib, "vllm_mlu"), exist_ok=True)
|
||
|
|
|
||
|
|
targets = []
|
||
|
|
|
||
|
|
def get_target_name(s: str) -> str:
|
||
|
|
return s.removeprefix("vllm_mlu.")
|
||
|
|
|
||
|
|
# Build all the extensions
|
||
|
|
for ext in self.extensions:
|
||
|
|
self.configure(ext)
|
||
|
|
targets.append(get_target_name(ext.name))
|
||
|
|
|
||
|
|
num_jobs = self.compute_num_jobs()
|
||
|
|
|
||
|
|
build_args = ["--build", ".", f"-j={num_jobs}",
|
||
|
|
*[f"--target={name}" for name in targets],
|
||
|
|
]
|
||
|
|
logger.info(build_args)
|
||
|
|
try:
|
||
|
|
subprocess.check_call(["cmake", *build_args], cwd=self.build_temp)
|
||
|
|
except OSError as e:
|
||
|
|
raise RuntimeError(f"Build library failed: {e}")
|
||
|
|
|
||
|
|
# Install the libraries
|
||
|
|
install_args = ["--install", ".", ]
|
||
|
|
try:
|
||
|
|
subprocess.check_call(["cmake", *install_args], cwd=self.build_temp)
|
||
|
|
except OSError as e:
|
||
|
|
raise RuntimeError(f"Install library failed: {e}")
|
||
|
|
|
||
|
|
# copy back to build folder for editable build
|
||
|
|
if isinstance(self.distribution.get_command_obj("develop"), develop):
|
||
|
|
for root, _, files in os.walk(self.build_temp):
|
||
|
|
for file in files:
|
||
|
|
if file.endswith(".so"):
|
||
|
|
src_path = os.path.join(root, file)
|
||
|
|
dst_path = os.path.join(self.build_lib, "vllm_mlu", file)
|
||
|
|
self.copy_file(src_path, dst_path)
|
||
|
|
logger.info(f"Copy: {src_path} -> {dst_path}")
|
||
|
|
|
||
|
|
def run(self):
|
||
|
|
# First, run the standard build_ext command to compile the extensions
|
||
|
|
super().run()
|
||
|
|
|
||
|
|
|
||
|
|
class custom_install(install):
|
||
|
|
def run(self):
|
||
|
|
self.run_command("build_ext")
|
||
|
|
install.run(self)
|
||
|
|
|
||
|
|
ext_modules = []
|
||
|
|
if envs.COMPILE_CUSTOM_KERNELS:
|
||
|
|
ext_modules = [CMakeExtension(name="vllm_mlu.vllm_mlu_C")]
|
||
|
|
cmdclass = {"build_ext": cmake_build_ext, "install": custom_install}
|
||
|
|
|
||
|
|
setup(
|
||
|
|
name="vllm_mlu",
|
||
|
|
version=get_vllm_version(),
|
||
|
|
author="Cambricon vLLM Team",
|
||
|
|
license="Apache 2.0",
|
||
|
|
description=("A high-throughput and memory-efficient inference and "
|
||
|
|
"serving engine for LLMs on MLU backend"),
|
||
|
|
long_description=read_readme(),
|
||
|
|
long_description_content_type="text/markdown",
|
||
|
|
url="",
|
||
|
|
project_urls={
|
||
|
|
"Homepage": "https://github.com/vllm-project/vllm",
|
||
|
|
"Documentation": "https://vllm.readthedocs.io/en/latest/",
|
||
|
|
},
|
||
|
|
classifiers=[
|
||
|
|
"Programming Language :: Python :: 3.8",
|
||
|
|
"Programming Language :: Python :: 3.9",
|
||
|
|
"Programming Language :: Python :: 3.10",
|
||
|
|
"Programming Language :: Python :: 3.11",
|
||
|
|
"Programming Language :: Python :: 3.12",
|
||
|
|
"License :: OSI Approved :: Apache Software License",
|
||
|
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||
|
|
],
|
||
|
|
packages=find_namespace_packages(exclude=("docs", "examples", "tests*", "csrc")),
|
||
|
|
include_package_data=True,
|
||
|
|
python_requires=">=3.8",
|
||
|
|
install_requires=get_requirements(),
|
||
|
|
ext_modules = ext_modules,
|
||
|
|
cmdclass=cmdclass,
|
||
|
|
entry_points={
|
||
|
|
'vllm.platform_plugins': ["mlu = vllm_mlu:register_mlu_platform"],
|
||
|
|
'vllm.general_plugins': ["mlu_hijack = vllm_mlu:register_mlu_hijack"]
|
||
|
|
}
|
||
|
|
)
|