forked from EngineX-Cambricon/enginex-mlu370-vllm
126 lines
3.5 KiB
Python
126 lines
3.5 KiB
Python
import importlib.util
|
|
import io
|
|
import logging
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import warnings
|
|
from shutil import which
|
|
from typing import Dict, List
|
|
|
|
from packaging.version import Version, parse
|
|
from setuptools import Extension, find_packages, setup
|
|
from setuptools.command.build_ext import build_ext
|
|
|
|
|
|
ROOT_DIR = os.path.dirname(__file__)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def get_path(*filepath) -> str:
|
|
return os.path.join(ROOT_DIR, *filepath)
|
|
|
|
|
|
def get_vllm_version() -> str:
|
|
"""
|
|
get vllm version
|
|
"""
|
|
with open(get_path("../vllm/version.py"), 'r') as file:
|
|
content = file.read()
|
|
|
|
vllm_version = None
|
|
vllm_mlu_version = None
|
|
|
|
version_pattern = r'__version__\s*=\s*"([^"]+)"'
|
|
vllm_mlu_version_pattern = r'__vllm_mlu_version__\s*=\s*"([^"]+)"'
|
|
|
|
version_matches = re.findall(version_pattern, content)
|
|
vllm_mlu_version_matches = re.findall(vllm_mlu_version_pattern, content)
|
|
|
|
assert (version_matches and vllm_mlu_version_matches), \
|
|
"fail to get vllm and vllm_mlu version."
|
|
vllm_version = version_matches[-1]
|
|
vllm_mlu_version = vllm_mlu_version_matches[-1]
|
|
|
|
return f"{vllm_mlu_version}+vllm{vllm_version}"
|
|
|
|
|
|
def read_readme() -> str:
|
|
"""Read the README file if present."""
|
|
p = get_path("README.md")
|
|
if os.path.isfile(p):
|
|
return io.open(get_path("README.md"), "r", encoding="utf-8").read()
|
|
else:
|
|
return ""
|
|
|
|
|
|
class cmake_build_ext(build_ext):
|
|
"""Used to build device_info.cpp file"""
|
|
def run(self):
|
|
if os.path.exists('device_info'):
|
|
build_temp = os.path.join('device_info', 'build')
|
|
if not os.path.exists(build_temp):
|
|
os.makedirs(build_temp)
|
|
cmake_cmd = [
|
|
'cmake',
|
|
'-S', 'device_info',
|
|
'-B', build_temp,
|
|
'-DCMAKE_INCLUDE_PATH=/usr/local/neuware/include',
|
|
'-DCMAKE_LIBRARY_PATH=/usr/local/neuware/lib64'
|
|
]
|
|
self.spawn(cmake_cmd)
|
|
|
|
build_cmd = ['cmake', '--build', build_temp]
|
|
self.spawn(build_cmd)
|
|
|
|
def _find_device_info_file() -> bool:
|
|
"""find the device_info folder"""
|
|
if os.path.exists("./device_info"):
|
|
return True
|
|
return False
|
|
|
|
|
|
if _find_device_info_file():
|
|
ext_modules=[
|
|
Extension(
|
|
'get_device_info',
|
|
sources=['device_info/get_device_info.cpp'],
|
|
)
|
|
]
|
|
|
|
cmdclass={"build_ext": cmake_build_ext}
|
|
else:
|
|
ext_modules = []
|
|
cmdclass = {}
|
|
|
|
|
|
setup(
|
|
name="vllm_mlu",
|
|
version=get_vllm_version(),
|
|
author="Cambricon vLLM Team",
|
|
license="Apache 2.0",
|
|
description=("A high-throughput and memory-efficient inference and "
|
|
"serving engine for LLMs on MLU backend"),
|
|
long_description=read_readme(),
|
|
long_description_content_type="text/markdown",
|
|
url="",
|
|
project_urls={
|
|
"Homepage": "https://github.com/vllm-project/vllm",
|
|
"Documentation": "https://vllm.readthedocs.io/en/latest/",
|
|
},
|
|
classifiers=[
|
|
"Programming Language :: Python :: 3.8",
|
|
"Programming Language :: Python :: 3.9",
|
|
"Programming Language :: Python :: 3.10",
|
|
"Programming Language :: Python :: 3.11",
|
|
"Programming Language :: Python :: 3.12",
|
|
"License :: OSI Approved :: Apache Software License",
|
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
],
|
|
packages=find_packages(),
|
|
python_requires=">=3.8",
|
|
ext_modules = ext_modules,
|
|
cmdclass=cmdclass,
|
|
)
|