将分散在各文件中的CUDA/HIP/MUSA硬件相关头文件引用统一到vendors目录下的对应头文件中,提高代码可维护性。移除重复的头文件引用,优化构建配置。
185 lines
5.9 KiB
CMake
185 lines
5.9 KiB
CMake
cmake_minimum_required(VERSION 3.26)
|
|
|
|
# When building directly using CMake, make sure you run the install step
|
|
# (it places the .so files in the correct location).
|
|
#
|
|
# Example:
|
|
# mkdir build && cd build
|
|
# cmake -G Ninja -DVLLM_PYTHON_EXECUTABLE=`which python3` -DCMAKE_INSTALL_PREFIX=.. ..
|
|
# cmake --build . --target install
|
|
#
|
|
# If you want to only build one target, make sure to install it manually:
|
|
# cmake --build . --target _C
|
|
# cmake --install . --component _C
|
|
project(vllm_extensions LANGUAGES CXX)
|
|
|
|
set(CMAKE_CXX_STANDARD 17)
|
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
|
|
|
|
# CUDA by default, can be overridden by using -DVLLM_TARGET_DEVICE=... (used by setup.py)
|
|
set(VLLM_TARGET_DEVICE "cuda" CACHE STRING "Target device backend for vLLM")
|
|
message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
|
|
message(STATUS "Target device: ${VLLM_TARGET_DEVICE}")
|
|
|
|
include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
|
|
|
|
# Suppress potential warnings about unused manually-specified variables
|
|
set(ignoreMe "${VLLM_PYTHON_PATH}")
|
|
|
|
# Prevent installation of dependencies (cutlass) by default.
|
|
install(CODE "set(CMAKE_INSTALL_LOCAL_ONLY TRUE)" ALL_COMPONENTS)
|
|
|
|
#
|
|
# Supported python versions. These versions will be searched in order, the
|
|
# first match will be selected. These should be kept in sync with setup.py.
|
|
#
|
|
set(PYTHON_SUPPORTED_VERSIONS "3.10" "3.11" "3.12" "3.13")
|
|
|
|
# ROCm installation prefix. Default to /opt/rocm but allow override via
|
|
# -DROCM_PATH=/your/rocm/path when invoking cmake.
|
|
if(NOT DEFINED ROCM_PATH)
|
|
set(ROCM_PATH "/opt/rocm" CACHE PATH "ROCm installation prefix")
|
|
else()
|
|
set(ROCM_PATH ${ROCM_PATH} CACHE PATH "ROCm installation prefix" FORCE)
|
|
endif()
|
|
|
|
#
|
|
# Try to find python package with an executable that exactly matches
|
|
# `VLLM_PYTHON_EXECUTABLE` and is one of the supported versions.
|
|
#
|
|
if (VLLM_PYTHON_EXECUTABLE)
|
|
find_python_from_executable(${VLLM_PYTHON_EXECUTABLE} "${PYTHON_SUPPORTED_VERSIONS}")
|
|
else()
|
|
message(FATAL_ERROR
|
|
"Please set VLLM_PYTHON_EXECUTABLE to the path of the desired python version"
|
|
" before running cmake configure.")
|
|
endif()
|
|
|
|
#
|
|
# Update cmake's `CMAKE_PREFIX_PATH` with torch location.
|
|
#
|
|
append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path")
|
|
|
|
#
|
|
# Import torch cmake configuration.
|
|
# Torch also imports CUDA (and partially HIP) languages with some customizations,
|
|
# so there is no need to do this explicitly with check_language/enable_language,
|
|
# etc.
|
|
#
|
|
find_package(Torch REQUIRED)
|
|
|
|
#
|
|
# Forward the non-CUDA device extensions to external CMake scripts.
|
|
#
|
|
if (NOT VLLM_TARGET_DEVICE STREQUAL "cuda" AND
|
|
NOT VLLM_TARGET_DEVICE STREQUAL "rocm")
|
|
if (VLLM_TARGET_DEVICE STREQUAL "cpu")
|
|
include(${CMAKE_CURRENT_LIST_DIR}/cmake/cpu_extension.cmake)
|
|
else()
|
|
return()
|
|
endif()
|
|
return()
|
|
endif()
|
|
|
|
#
|
|
# Set up GPU language and check the torch version and warn if it isn't
|
|
# what is expected.
|
|
#
|
|
if (VLLM_TARGET_DEVICE STREQUAL "cuda")
|
|
# Include CUDA specific configuration
|
|
include(${CMAKE_CURRENT_LIST_DIR}/cmake/cuda.cmake)
|
|
elseif(VLLM_TARGET_DEVICE STREQUAL "rocm")
|
|
# Include ROCm specific configuration
|
|
include(${CMAKE_CURRENT_LIST_DIR}/cmake/hip.cmake)
|
|
elseif(VLLM_TARGET_DEVICE STREQUAL "cpu")
|
|
include(${CMAKE_CURRENT_LIST_DIR}/cmake/cpu_extension.cmake)
|
|
elseif(VLLM_TARGET_DEVICE STREQUAL "musa")
|
|
include(${CMAKE_CURRENT_LIST_DIR}/cmake/musa.cmake)
|
|
else()
|
|
message(FATAL_ERROR "Can't find CUDA or HIP installation.")
|
|
endif()
|
|
|
|
# Use FetchContent for C++ dependencies that are compiled as part of vLLM's build process.
|
|
include(FetchContent)
|
|
file(MAKE_DIRECTORY ${FETCHCONTENT_BASE_DIR}) # Ensure the directory exists
|
|
message(STATUS "FetchContent base directory: ${FETCHCONTENT_BASE_DIR}")
|
|
|
|
#
|
|
# Define other extension targets
|
|
#
|
|
|
|
#
|
|
# cumem_allocator extension
|
|
# Architecture-specific cumem configurations are included from cmake/cuda.cmake or cmake/hip.cmake
|
|
#
|
|
|
|
if(VLLM_GPU_LANG STREQUAL "CUDA" OR VLLM_GPU_LANG STREQUAL "HIP")
|
|
message(STATUS "Enabling cumem allocator extension.")
|
|
define_extension_target(
|
|
cumem_allocator
|
|
DESTINATION vllm
|
|
LANGUAGE CXX
|
|
SOURCES ${VLLM_CUMEM_EXT_SRC}
|
|
LIBRARIES ${CUMEM_LIBS}
|
|
USE_SABI 3.8
|
|
WITH_SOABI)
|
|
endif()
|
|
|
|
#
|
|
# _C extension
|
|
#
|
|
|
|
# VLLM_EXT_SRC is defined in the architecture-specific cmake files (cuda.cmake or hip.cmake)
|
|
|
|
message(STATUS "Enabling C extension.")
|
|
define_extension_target(
|
|
_C
|
|
DESTINATION vllm
|
|
LANGUAGE ${VLLM_GPU_LANG}
|
|
SOURCES ${VLLM_EXT_SRC}
|
|
COMPILE_FLAGS ${VLLM_GPU_FLAGS}
|
|
ARCHITECTURES ${VLLM_GPU_ARCHES}
|
|
INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR}
|
|
INCLUDE_DIRECTORIES ${CUTLASS_TOOLS_UTIL_INCLUDE_DIR}
|
|
USE_SABI 3
|
|
WITH_SOABI)
|
|
|
|
# If CUTLASS is compiled on NVCC >= 12.5, it by default uses
|
|
# cudaGetDriverEntryPointByVersion as a wrapper to avoid directly calling the
|
|
# driver API. This causes problems when linking with earlier versions of CUDA.
|
|
# Setting this variable sidesteps the issue by calling the driver directly.
|
|
target_compile_definitions(_C PRIVATE CUTLASS_ENABLE_DIRECT_CUDA_DRIVER_CALL=1)
|
|
|
|
# _moe_C extension
|
|
# Architecture-specific MOE configurations are included from cmake/cuda.cmake or cmake/hip.cmake
|
|
|
|
message(STATUS "Enabling moe extension.")
|
|
define_extension_target(
|
|
_moe_C
|
|
DESTINATION vllm
|
|
LANGUAGE ${VLLM_GPU_LANG}
|
|
SOURCES ${VLLM_MOE_EXT_SRC}
|
|
COMPILE_FLAGS ${VLLM_GPU_FLAGS}
|
|
ARCHITECTURES ${VLLM_GPU_ARCHES}
|
|
INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR}
|
|
INCLUDE_DIRECTORIES ${CUTLASS_TOOLS_UTIL_INCLUDE_DIR}
|
|
USE_SABI 3
|
|
WITH_SOABI)
|
|
|
|
# Architecture-specific ROCm configurations are included from cmake/hip.cmake
|
|
|
|
# For CUDA and HIP builds also build the triton_kernels external package.
|
|
if(VLLM_GPU_LANG STREQUAL "CUDA" OR VLLM_GPU_LANG STREQUAL "HIP")
|
|
include(cmake/external_projects/triton_kernels.cmake)
|
|
endif()
|
|
|
|
# For CUDA we also build and ship some external projects.
|
|
if (VLLM_GPU_LANG STREQUAL "CUDA")
|
|
include(cmake/external_projects/flashmla.cmake)
|
|
include(cmake/external_projects/qutlass.cmake)
|
|
|
|
# vllm-flash-attn should be last as it overwrites some CMake functions
|
|
include(cmake/external_projects/vllm_flash_attn.cmake)
|
|
endif ()
|