xc-llm-ascend/setup.py

#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
# Copyright 2023 The vLLM team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
# Adapted from https://github.com/vllm-project/vllm/blob/main/setup.py
#

import importlib.util
import logging
import os
import subprocess
import sys
from sysconfig import get_paths
from typing import Dict, List

from setuptools import Command, Extension, find_packages, setup
from setuptools.command.build_ext import build_ext
from setuptools.command.build_py import build_py
from setuptools.command.develop import develop
from setuptools.command.install import install
from setuptools_scm import get_version


def load_module_from_path(module_name, path):
    spec = importlib.util.spec_from_file_location(module_name, path)
    module = importlib.util.module_from_spec(spec)
    sys.modules[module_name] = module
    spec.loader.exec_module(module)
    return module


ROOT_DIR = os.path.dirname(__file__)
logger = logging.getLogger(__name__)


def check_or_set_default_env(cmake_args,
                             env_name,
                             env_variable,
                             default_path=""):
    if env_variable is None:
        logging.warning(
            f"No {env_name} found in your environment, pleause try to set {env_name} "
            "if you customize the installation path of this library, otherwise default "
            "path will be adapted during build this project")
        logging.warning(f"Set default {env_name}: {default_path}")
        env_variable = default_path
    else:
        logging.info(f"Found existing {env_name}: {env_variable}")
    # cann package seems will check this environments in cmake, need write this env variable back.
    if env_name == "ASCEND_HOME_PATH":
        os.environ["ASCEND_HOME_PATH"] = env_variable
    cmake_args += [f"-D{env_name}={env_variable}"]
    return cmake_args


def get_value_from_lines(lines: List[str], key: str) -> str:
    for line in lines:
        line = ' '.join(line.split())
        if key in line:
            return line.split(':')[-1].strip()
    return ""


def get_chip_type() -> str:
    try:
        npu_info_lines = subprocess.check_output(
            ['npu-smi', 'info', '-l']).decode().strip().split('\n')
        npu_id = int(get_value_from_lines(npu_info_lines, 'NPU ID'))
        chip_info_lines = subprocess.check_output(
            ['npu-smi', 'info', '-t', 'board', '-i',
             str(npu_id), '-c', '0']).decode().strip().split('\n')
        chip_name = get_value_from_lines(chip_info_lines, 'Chip Name')
        chip_type = get_value_from_lines(chip_info_lines, 'Chip Type')
        npu_name = get_value_from_lines(chip_info_lines, 'NPU Name')

        if "310" in chip_name:
            # 310P case
            assert chip_type
            return (chip_type + chip_name).lower()
        elif "910" in chip_name:
            if chip_type:
                # A2 case
                assert not npu_name
                return (chip_type + chip_name).lower()
            else:
                # A3 case
                assert npu_name
                return (chip_name + '_' + npu_name).lower()
        else:
            # TODO(zzzzwwjj): Currently, A5's chip name has not determined yet.
            raise ValueError(
                f"Unable to recognize chip name: {chip_name}, please manually set env SOC_VERSION"
            )
    except subprocess.CalledProcessError as e:
        raise RuntimeError(f"Get chip info failed: {e}")
    except FileNotFoundError:
        logging.warning(
            "npu-smi command not found, if this is an npu envir, please check if npu driver is installed correctly."
        )
        return ""


envs = load_module_from_path("envs",
                             os.path.join(ROOT_DIR, "vllm_ascend", "envs.py"))

soc_version = get_chip_type()

if not envs.SOC_VERSION:
    if not soc_version:
        raise RuntimeError(
            "Could not determine chip type automatically via 'npu-smi'. "
            "This can happen in a CPU-only environment. "
            "Please set the 'SOC_VERSION' environment variable to specify the target chip."
        )
    envs.SOC_VERSION = soc_version
else:
    if soc_version and envs.SOC_VERSION != soc_version:
        logging.warning(
            f"env SOC_VERSION: {envs.SOC_VERSION} is not equal to soc_version from npu-smi: {soc_version}"
        )


def gen_build_info():
    soc_version = envs.SOC_VERSION

    soc_to_device = {
        "910b": "A2",
        "910c": "A3",
        "310p": "_310P",
        "ascend910b1": "A2",
        "ascend910b2": "A2",
        "ascend910b2c": "A2",
        "ascend910b3": "A2",
        "ascend910b4": "A2",
        "ascend910b4-1": "A2",
        "ascend910_9391": "A3",
        "ascend910_9381": "A3",
        "ascend910_9372": "A3",
        "ascend910_9392": "A3",
        "ascend910_9382": "A3",
        "ascend910_9362": "A3",
        "ascend310p1": "_310P",
        "ascend310p3": "_310P",
        "ascend310p5": "_310P",
        "ascend310p7": "_310P",
        "ascend310p3vir01": "_310P",
        "ascend310p3vir02": "_310P",
        "ascend310p3vir04": "_310P",
        "ascend310p3vir08": "_310P",
        "ascend910_9579": "A5",
    }

    assert soc_version in soc_to_device, f"Undefined soc_version: {soc_version}. Please file an issue to vllm-ascend."
    device_type = soc_to_device[soc_version]

    package_dir = os.path.join(ROOT_DIR, "vllm_ascend", "_build_info.py")
    with open(package_dir, "w+") as f:
        f.write('# Auto-generated file\n')
        f.write(f"__device_type__ = '{device_type}'\n")
    logging.info(f"Generated _build_info.py with SOC version: {soc_version}")


class CMakeExtension(Extension):

    def __init__(self,
                 name: str,
                 cmake_lists_dir: str = ".",
                 **kwargs) -> None:
        super().__init__(name, sources=[], py_limited_api=False, **kwargs)
        self.cmake_lists_dir = os.path.abspath(cmake_lists_dir)


class custom_develop(develop):

    def run(self):
        gen_build_info()
        super().run()


class custom_build_info(build_py):

    def run(self):
        gen_build_info()
        super().run()


class build_and_install_aclnn(Command):
    description = "Build and install AclNN by running build_aclnn.sh"
    user_options = []

    def initialize_options(self):
        pass

    def finalize_options(self):
        pass

    def run(self):
        try:
            print("Running bash build_aclnn.sh ...")
            subprocess.check_call(
                ["bash", "csrc/build_aclnn.sh", ROOT_DIR, envs.SOC_VERSION])
            print("buid_aclnn.sh executed successfully!")
        except subprocess.CalledProcessError as e:
            print(f"Error running build_aclnn.sh: {e}")
            raise SystemExit(e.returncode)


class cmake_build_ext(build_ext):
    # A dict of extension directories that have been configured.
    did_config: Dict[str, bool] = {}

    #
    # Determine number of compilation jobs
    #
    def compute_num_jobs(self):
        # `num_jobs` is either the value of the MAX_JOBS environment variable
        # (if defined) or the number of CPUs available.
        num_jobs = envs.MAX_JOBS
        if num_jobs is not None:
            num_jobs = int(num_jobs)
            logger.info("Using MAX_JOBS=%d as the number of jobs.", num_jobs)
        else:
            try:
                # os.sched_getaffinity() isn't universally available, so fall
                #  back to os.cpu_count() if we get an error here.
                num_jobs = len(os.sched_getaffinity(0))
            except AttributeError:
                num_jobs = os.cpu_count()
        num_jobs = max(1, num_jobs)

        return num_jobs

    #
    # Perform cmake configuration for a single extension.
    #
    def configure(self, ext: CMakeExtension) -> None:
        build_temp = self.build_temp
        os.makedirs(build_temp, exist_ok=True)
        source_dir = os.path.abspath(ROOT_DIR)
        python_executable = sys.executable
        cmake_args = ["cmake"]
        # Default use release mode to compile the csrc code
        # Turbo now support compiled with Release, Debug and RelWithDebugInfo
        if envs.CMAKE_BUILD_TYPE is None or envs.CMAKE_BUILD_TYPE not in [
                "Debug",
                "Release",
                "RelWithDebugInfo",
        ]:
            envs.CMAKE_BUILD_TYPE = "Release"
        cmake_args += [f"-DCMAKE_BUILD_TYPE={envs.CMAKE_BUILD_TYPE}"]
        # Default dump the compile commands for lsp
        cmake_args += ["-DCMAKE_EXPORT_COMPILE_COMMANDS=1"]
        if envs.CXX_COMPILER is not None:
            cmake_args += [f"-DCMAKE_CXX_COMPILER={envs.CXX_COMPILER}"]
        if envs.C_COMPILER is not None:
            cmake_args += [f"-DCMAKE_C_COMPILER={envs.C_COMPILER}"]
        if envs.VERBOSE:
            cmake_args += ["-DCMAKE_VERBOSE_MAKEFILE=ON"]

        # find ASCEND_HOME_PATH
        check_or_set_default_env(
            cmake_args,
            "ASCEND_HOME_PATH",
            envs.ASCEND_HOME_PATH,
            "/usr/local/Ascend/ascend-toolkit/latest",
        )

        # find PYTHON_EXECUTABLE
        check_or_set_default_env(cmake_args, "PYTHON_EXECUTABLE",
                                 sys.executable)

        # find PYTHON_INCLUDE_PATH
        check_or_set_default_env(cmake_args, "PYTHON_INCLUDE_PATH",
                                 get_paths()["include"])

        # ccache and ninja can not be applied at ascendc kernels now

        try:
            # if pybind11 is installed via pip
            pybind11_cmake_path = (subprocess.check_output(
                [python_executable, "-m", "pybind11",
                 "--cmakedir"]).decode().strip())
        except subprocess.CalledProcessError as e:
            # else specify pybind11 path installed from source code on CI container
            raise RuntimeError(f"CMake configuration failed: {e}")

        install_path = os.path.join(ROOT_DIR, self.build_lib)
        if isinstance(self.distribution.get_command_obj("develop"), develop):
            install_path = os.path.join(ROOT_DIR, "vllm_ascend")
        # add CMAKE_INSTALL_PATH
        cmake_args += [f"-DCMAKE_INSTALL_PREFIX={install_path}"]

        cmake_args += [f"-DCMAKE_PREFIX_PATH={pybind11_cmake_path}"]

        soc_version_map = {
            "910b": "ascend910b1",
            "910c": "ascend910_9392",
            "310p": "ascend310p1",
        }
        CANN_SOC_VERSION = soc_version_map.get(envs.SOC_VERSION,
                                               envs.SOC_VERSION)
        cmake_args += [f"-DSOC_VERSION={CANN_SOC_VERSION}"]

        # Override the base directory for FetchContent downloads to $ROOT/.deps
        # This allows sharing dependencies between profiles,
        # and plays more nicely with sccache.
        # To override this, set the FETCHCONTENT_BASE_DIR environment variable.
        fc_base_dir = os.path.join(ROOT_DIR, ".deps")
        fc_base_dir = os.environ.get("FETCHCONTENT_BASE_DIR", fc_base_dir)
        cmake_args += ["-DFETCHCONTENT_BASE_DIR={}".format(fc_base_dir)]

        torch_npu_command = "python3 -m pip show torch-npu | grep '^Location:' | awk '{print $2}'"
        try:
            torch_npu_path = subprocess.check_output(
                torch_npu_command, shell=True).decode().strip()
            torch_npu_path += "/torch_npu"
        except subprocess.CalledProcessError as e:
            raise RuntimeError(f"Retrieve torch version version failed: {e}")

        # add TORCH_NPU_PATH
        cmake_args += [f"-DTORCH_NPU_PATH={torch_npu_path}"]

        build_tool = []
        # TODO(ganyi): ninja and ccache support for ascend c auto codegen. now we can only use make build
        # if which('ninja') is not None:
        #     build_tool += ['-G', 'Ninja']
        # Default build tool to whatever cmake picks.

        cmake_args += [source_dir]
        logging.info(f"cmake config command: {cmake_args}")
        try:
            subprocess.check_call(cmake_args, cwd=self.build_temp)
        except subprocess.CalledProcessError as e:
            raise RuntimeError(f"CMake configuration failed: {e}")

        subprocess.check_call(
            ["cmake", ext.cmake_lists_dir, *build_tool, *cmake_args],
            cwd=self.build_temp,
        )

    def build_extensions(self) -> None:
        # Ensure that CMake is present and working
        try:
            subprocess.check_output(["cmake", "--version"])
        except OSError as e:
            raise RuntimeError(f"Cannot find CMake executable: {e}")

        # Create build directory if it does not exist.
        if not os.path.exists(self.build_temp):
            os.makedirs(self.build_temp)

        targets = []

        os.makedirs(os.path.join(self.build_lib, "vllm_ascend"), exist_ok=True)

        def target_name(s: str) -> str:
            return s.removeprefix("vllm_ascend.")

        # Build all the extensions
        for ext in self.extensions:
            self.configure(ext)
            targets.append(target_name(ext.name))

        num_jobs = self.compute_num_jobs()

        build_args = [
            "--build",
            ".",
            f"-j={num_jobs}",
            *[f"--target={name}" for name in targets],
        ]
        try:
            subprocess.check_call(["cmake", *build_args], cwd=self.build_temp)
        except OSError as e:
            raise RuntimeError(f"Build library failed: {e}")
        # Install the libraries
        install_args = [
            "cmake",
            "--install",
            ".",
        ]
        try:
            subprocess.check_call(install_args, cwd=self.build_temp)
        except OSError as e:
            raise RuntimeError(f"Install library failed: {e}")

        # copy back to build folder for editable build
        if isinstance(self.distribution.get_command_obj("develop"), develop):
            import shutil
            for root, _, files in os.walk(self.build_temp):
                for file in files:
                    if file.endswith(".so"):
                        src_path = os.path.join(root, file)
                        dst_path = os.path.join(self.build_lib, "vllm_ascend",
                                                file)
                        shutil.copy(src_path, dst_path)
                        print(f"Copy: {src_path} -> {dst_path}")

        # copy back _cann_ops_custom directory
        src_cann_ops_custom = os.path.join(ROOT_DIR, "vllm_ascend",
                                           "_cann_ops_custom")
        dst_cann_ops_custom = os.path.join(self.build_lib, "vllm_ascend",
                                           "_cann_ops_custom")
        if os.path.exists(src_cann_ops_custom):
            import shutil
            if os.path.exists(dst_cann_ops_custom):
                shutil.rmtree(dst_cann_ops_custom)
            shutil.copytree(src_cann_ops_custom, dst_cann_ops_custom)
            print(f"Copy: {src_cann_ops_custom} -> {dst_cann_ops_custom}")

    def run(self):
        # First, ensure ACLNN custom-ops is built and installed.
        self.run_command("build_aclnn")
        # Then, run the standard build_ext command to compile the extensions
        super().run()


class custom_install(install):

    def run(self):
        self.run_command("build_ext")
        install.run(self)


ROOT_DIR = os.path.dirname(__file__)
try:
    VERSION = get_version(write_to="vllm_ascend/_version.py")
except LookupError:
    # The checkout action in github action CI does not checkout the tag. It
    # only checks out the commit. In this case, we set a dummy version.
    VERSION = "0.0.0"

ext_modules = [CMakeExtension(name="vllm_ascend.vllm_ascend_C")]


def get_path(*filepath) -> str:
    return os.path.join(ROOT_DIR, *filepath)


def read_readme() -> str:
    """Read the README file if present."""
    p = get_path("README.md")
    if os.path.isfile(p):
        with open(get_path("README.md"), encoding="utf-8") as f:
            return f.read()
    else:
        return ""


def get_requirements() -> List[str]:
    """Get Python package dependencies from requirements.txt."""

    def _read_requirements(filename: str) -> List[str]:
        with open(get_path(filename)) as f:
            requirements = f.read().strip().split("\n")
        resolved_requirements = []
        for line in requirements:
            if line.startswith("-r "):
                resolved_requirements += _read_requirements(line.split()[1])
            elif line.startswith("--"):
                continue
            else:
                resolved_requirements.append(line)
        return resolved_requirements

    try:
        requirements = _read_requirements("requirements.txt")
    except ValueError:
        print("Failed to read requirements.txt in vllm_ascend.")
    return requirements


cmdclass = {
    "develop": custom_develop,
    "build_py": custom_build_info,
    "build_aclnn": build_and_install_aclnn,
    "build_ext": cmake_build_ext,
    "install": custom_install
}

setup(
    name="vllm_ascend",
    # Follow:
    # https://packaging.python.org/en/latest/specifications/version-specifiers
    version=VERSION,
    author="vLLM-Ascend team",
    license="Apache 2.0",
    description="vLLM Ascend backend plugin",
    long_description=read_readme(),
    long_description_content_type="text/markdown",
    url="https://github.com/vllm-project/vllm-ascend",
    project_urls={
        "Homepage": "https://github.com/vllm-project/vllm-ascend",
    },
    # TODO: Add 3.12 back when torch-npu support 3.12
    classifiers=[
        "Programming Language :: Python :: 3.10",
        "Programming Language :: Python :: 3.11",
        "License :: OSI Approved :: Apache Software License",
        "Intended Audience :: Developers",
        "Intended Audience :: Information Technology",
        "Intended Audience :: Science/Research",
        "Topic :: Scientific/Engineering :: Artificial Intelligence",
        "Topic :: Scientific/Engineering :: Information Analysis",
    ],
    packages=find_packages(exclude=("docs", "examples", "tests*", "csrc")),
    python_requires=">=3.10",
    install_requires=get_requirements(),
    ext_modules=ext_modules,
    cmdclass=cmdclass,
    extras_require={},
    entry_points={
        "vllm.platform_plugins": ["ascend = vllm_ascend:register"],
        "vllm.general_plugins": [
            "ascend_kv_connector = vllm_ascend:register_connector",
            "ascend_model_loader = vllm_ascend:register_model_loader",
            "ascend_service_profiling = vllm_ascend:register_service_profiling"
        ],
    },
)