[Model] Support DeepSeek-V4
This commit is contained in:
276
setup.py
Normal file
276
setup.py
Normal file
@@ -0,0 +1,276 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM-MLU project
|
||||
import importlib.util
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from sysconfig import get_paths
|
||||
from typing import List, Dict
|
||||
from setuptools import Extension
|
||||
from setuptools import find_namespace_packages, setup
|
||||
from setuptools.command.build_ext import build_ext
|
||||
from setuptools.command.install import install
|
||||
from setuptools.command.develop import develop
|
||||
|
||||
ROOT_DIR = os.path.dirname(__file__)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def check_or_set_default_env(cmake_args, env_name, env_variable, default_path=""):
|
||||
if env_variable is None:
|
||||
logging.warning(f"Set default {env_name}: {default_path}")
|
||||
env_variable = default_path
|
||||
else:
|
||||
logging.info(f"Found existing {env_name}: {env_variable}")
|
||||
cmake_args += [f"-D{env_name}={env_variable}"]
|
||||
return cmake_args
|
||||
|
||||
def load_module_from_path(module_name, path):
|
||||
spec = importlib.util.spec_from_file_location(module_name, path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
sys.modules[module_name] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
envs = load_module_from_path("envs", os.path.join(ROOT_DIR, "vllm_mlu", "envs.py"))
|
||||
class CMakeExtension(Extension):
|
||||
def __init__(self,
|
||||
name: str,
|
||||
cmake_lists_dir: str = ".",
|
||||
**kwargs) -> None:
|
||||
super().__init__(name, sources=[], py_limited_api=False, **kwargs)
|
||||
self.cmake_lists_dir = os.path.abspath(cmake_lists_dir)
|
||||
|
||||
|
||||
def get_path(*filepath) -> str:
|
||||
return os.path.join(ROOT_DIR, *filepath)
|
||||
|
||||
|
||||
def get_vllm_version() -> str:
|
||||
"""
|
||||
get vllm version
|
||||
"""
|
||||
with open(get_path("tools/build.property"), 'r') as file:
|
||||
content = file.read()
|
||||
|
||||
results = re.findall(r'VLLM_VERSION=([\d|\.]+)\+mlu([\d|\.]+)\.pt(\d+)', content)
|
||||
|
||||
assert results, "fail to get vllm, vllm_mlu and pytorch version."
|
||||
|
||||
version = f"{results[-1][0]}+mlu{results[-1][1]}.pt{results[-1][2]}"
|
||||
|
||||
return version
|
||||
|
||||
|
||||
def read_readme() -> str:
|
||||
"""Read the README file if present."""
|
||||
p = get_path("README.md")
|
||||
if os.path.isfile(p):
|
||||
return io.open(get_path("README.md"), "r", encoding="utf-8").read()
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
def get_requirements() -> List[str]:
|
||||
"""Get Python package dependencies from requirements.txt."""
|
||||
|
||||
def _read_requirements(filename: str) -> List[str]:
|
||||
with open(get_path(filename)) as f:
|
||||
requirements = f.read().strip().split("\n")
|
||||
resolved_requirements = []
|
||||
for line in requirements:
|
||||
if line.startswith("-r "):
|
||||
resolved_requirements += _read_requirements(line.split()[1])
|
||||
elif line.startswith("--"):
|
||||
continue
|
||||
else:
|
||||
resolved_requirements.append(line)
|
||||
return resolved_requirements
|
||||
return _read_requirements("requirements.txt")
|
||||
|
||||
|
||||
class cmake_build_ext(build_ext):
|
||||
# A dict of extension directories that have been configured.
|
||||
did_config: Dict[str, bool] = {}
|
||||
|
||||
# Determine number of compilation jobs
|
||||
def compute_num_jobs(self):
|
||||
# `num_jobs` is either the value of the MAX_JOBS environment variable
|
||||
# (if defined) or the number of CPUs available.
|
||||
num_jobs = envs.MAX_JOBS
|
||||
if num_jobs is not None:
|
||||
num_jobs = int(num_jobs)
|
||||
logger.info("Using MAX_JOBS=%d as the number of jobs.", num_jobs)
|
||||
else:
|
||||
try:
|
||||
# os.sched_getaffinity() isn't universally available, so fall
|
||||
# back to os.cpu_count() if we get an error here.
|
||||
num_jobs = len(os.sched_getaffinity(0))
|
||||
except AttributeError:
|
||||
num_jobs = os.cpu_count()
|
||||
num_jobs = max(1, num_jobs)
|
||||
|
||||
return num_jobs
|
||||
|
||||
#
|
||||
# Perform cmake configuration for a single extension.
|
||||
#
|
||||
def configure(self, ext: CMakeExtension) -> None:
|
||||
os.makedirs(self.build_temp, exist_ok=True)
|
||||
source_dir = os.path.abspath(ROOT_DIR)
|
||||
python_executable = sys.executable
|
||||
cmake_args = ["cmake"]
|
||||
# Default use release mode to compile the csrc code
|
||||
# Turbo now support compiled with Release, Debug and RelWithDebugInfo
|
||||
if envs.CMAKE_BUILD_TYPE is None or envs.CMAKE_BUILD_TYPE not in [
|
||||
"Debug",
|
||||
"Release",
|
||||
"RelWithDebugInfo",
|
||||
]:
|
||||
envs.CMAKE_BUILD_TYPE = "Release"
|
||||
cmake_args += [f"-DCMAKE_BUILD_TYPE={envs.CMAKE_BUILD_TYPE}"]
|
||||
# Default dump the compile commands for lsp
|
||||
cmake_args += ["-DCMAKE_EXPORT_COMPILE_COMMANDS=1"]
|
||||
if envs.CXX_COMPILER is not None:
|
||||
cmake_args += [f"-DCMAKE_CXX_COMPILER={envs.CXX_COMPILER}"]
|
||||
if envs.C_COMPILER is not None:
|
||||
cmake_args += [f"-DCMAKE_C_COMPILER={envs.C_COMPILER}"]
|
||||
if envs.VERBOSE:
|
||||
cmake_args += ["-DCMAKE_VERBOSE_MAKEFILE=ON"]
|
||||
|
||||
# find PYTHON_EXECUTABLE
|
||||
check_or_set_default_env(cmake_args, "PYTHON_EXECUTABLE", sys.executable)
|
||||
|
||||
# find PYTHON_INCLUDE_PATH
|
||||
check_or_set_default_env(cmake_args, "PYTHON_INCLUDE_PATH",
|
||||
get_paths()["include"])
|
||||
|
||||
try:
|
||||
# if pybind11 is installed via pip
|
||||
subprocess.check_call([sys.executable, "-m", "pip", "install", "pybind11==2.13.6"])
|
||||
pybind11_cmake_path = (subprocess.check_output([python_executable, "-m",
|
||||
"pybind11", "--cmake"]).decode().strip())
|
||||
except subprocess.CalledProcessError as e:
|
||||
# else specify pybind11 path installed from source code on CI container
|
||||
raise RuntimeError(f"CMake configuration failed: {e}")
|
||||
|
||||
install_path = os.path.join(ROOT_DIR, self.build_lib)
|
||||
if isinstance(self.distribution.get_command_obj("develop"), develop):
|
||||
install_path = os.path.join(ROOT_DIR, "vllm_mlu")
|
||||
|
||||
# add CMAKE_INSTALL_PATH
|
||||
cmake_args += [f"-DCMAKE_INSTALL_PREFIX={install_path}"]
|
||||
|
||||
cmake_args += [f"-DCMAKE_PREFIX_PATH={pybind11_cmake_path}"]
|
||||
|
||||
cmake_args += [source_dir]
|
||||
logging.info(f"cmake config command: {cmake_args}")
|
||||
try:
|
||||
subprocess.check_call(cmake_args, cwd=self.build_temp)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise RuntimeError(f"CMake configuration failed: {e}")
|
||||
|
||||
def build_extensions(self) -> None:
|
||||
if not envs.COMPILE_CUSTOM_KERNELS:
|
||||
return
|
||||
# Ensure that CMake is present and working
|
||||
try:
|
||||
subprocess.check_output(["cmake", "--version"])
|
||||
except OSError as e:
|
||||
raise RuntimeError(f"Cannot find CMake executable: {e}")
|
||||
|
||||
# Create build directory if it does not exist.
|
||||
if not os.path.exists(self.build_temp):
|
||||
os.makedirs(self.build_temp)
|
||||
os.makedirs(os.path.join(self.build_lib, "vllm_mlu"), exist_ok=True)
|
||||
|
||||
targets = []
|
||||
|
||||
def get_target_name(s: str) -> str:
|
||||
return s.removeprefix("vllm_mlu.")
|
||||
|
||||
# Build all the extensions
|
||||
for ext in self.extensions:
|
||||
self.configure(ext)
|
||||
targets.append(get_target_name(ext.name))
|
||||
|
||||
num_jobs = self.compute_num_jobs()
|
||||
|
||||
build_args = ["--build", ".", f"-j={num_jobs}",
|
||||
*[f"--target={name}" for name in targets],
|
||||
]
|
||||
logger.info(build_args)
|
||||
try:
|
||||
subprocess.check_call(["cmake", *build_args], cwd=self.build_temp)
|
||||
except OSError as e:
|
||||
raise RuntimeError(f"Build library failed: {e}")
|
||||
|
||||
# Install the libraries
|
||||
install_args = ["--install", ".", ]
|
||||
try:
|
||||
subprocess.check_call(["cmake", *install_args], cwd=self.build_temp)
|
||||
except OSError as e:
|
||||
raise RuntimeError(f"Install library failed: {e}")
|
||||
|
||||
# copy back to build folder for editable build
|
||||
if isinstance(self.distribution.get_command_obj("develop"), develop):
|
||||
for root, _, files in os.walk(self.build_temp):
|
||||
for file in files:
|
||||
if file.endswith(".so"):
|
||||
src_path = os.path.join(root, file)
|
||||
dst_path = os.path.join(self.build_lib, "vllm_mlu", file)
|
||||
self.copy_file(src_path, dst_path)
|
||||
logger.info(f"Copy: {src_path} -> {dst_path}")
|
||||
|
||||
def run(self):
|
||||
# First, run the standard build_ext command to compile the extensions
|
||||
super().run()
|
||||
|
||||
|
||||
class custom_install(install):
|
||||
def run(self):
|
||||
self.run_command("build_ext")
|
||||
install.run(self)
|
||||
|
||||
ext_modules = []
|
||||
if envs.COMPILE_CUSTOM_KERNELS:
|
||||
ext_modules = [CMakeExtension(name="vllm_mlu.vllm_mlu_C")]
|
||||
cmdclass = {"build_ext": cmake_build_ext, "install": custom_install}
|
||||
|
||||
setup(
|
||||
name="vllm_mlu",
|
||||
version=get_vllm_version(),
|
||||
author="Cambricon vLLM Team",
|
||||
license="Apache 2.0",
|
||||
description=("A high-throughput and memory-efficient inference and "
|
||||
"serving engine for LLMs on MLU backend"),
|
||||
long_description=read_readme(),
|
||||
long_description_content_type="text/markdown",
|
||||
url="",
|
||||
project_urls={
|
||||
"Homepage": "https://github.com/vllm-project/vllm",
|
||||
"Documentation": "https://vllm.readthedocs.io/en/latest/",
|
||||
},
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
],
|
||||
packages=find_namespace_packages(exclude=("docs", "examples", "tests*", "csrc")),
|
||||
include_package_data=True,
|
||||
python_requires=">=3.8",
|
||||
install_requires=get_requirements(),
|
||||
ext_modules = ext_modules,
|
||||
cmdclass=cmdclass,
|
||||
entry_points={
|
||||
'vllm.platform_plugins': ["mlu = vllm_mlu:register_mlu_platform"],
|
||||
'vllm.general_plugins': ["mlu_hijack = vllm_mlu:register_mlu_hijack"]
|
||||
}
|
||||
)
|
||||
Reference in New Issue
Block a user