cmake_minimum_required(VERSION 3.26) # When building directly using CMake, make sure you run the install step # (it places the .so files in the correct location). # # Example: # mkdir build && cd build # cmake -G Ninja -DVLLM_PYTHON_EXECUTABLE=`which python3` -DCMAKE_INSTALL_PREFIX=.. .. # cmake --build . --target install # # If you want to only build one target, make sure to install it manually: # cmake --build . --target _C # cmake --install . --component _C project(vllm_extensions LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) # CUDA by default, can be overridden by using -DVLLM_TARGET_DEVICE=... (used by setup.py) set(VLLM_TARGET_DEVICE "cuda" CACHE STRING "Target device backend for vLLM") message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") message(STATUS "Target device: ${VLLM_TARGET_DEVICE}") include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake) # Suppress potential warnings about unused manually-specified variables set(ignoreMe "${VLLM_PYTHON_PATH}") # Prevent installation of dependencies (cutlass) by default. install(CODE "set(CMAKE_INSTALL_LOCAL_ONLY TRUE)" ALL_COMPONENTS) # # Supported python versions. These versions will be searched in order, the # first match will be selected. These should be kept in sync with setup.py. # set(PYTHON_SUPPORTED_VERSIONS "3.10" "3.11" "3.12" "3.13") # ROCm installation prefix. Default to /opt/rocm but allow override via # -DROCM_PATH=/your/rocm/path when invoking cmake. if(NOT DEFINED ROCM_PATH) set(ROCM_PATH "/opt/rocm" CACHE PATH "ROCm installation prefix") else() set(ROCM_PATH ${ROCM_PATH} CACHE PATH "ROCm installation prefix" FORCE) endif() # # Try to find python package with an executable that exactly matches # `VLLM_PYTHON_EXECUTABLE` and is one of the supported versions. # if (VLLM_PYTHON_EXECUTABLE) find_python_from_executable(${VLLM_PYTHON_EXECUTABLE} "${PYTHON_SUPPORTED_VERSIONS}") else() message(FATAL_ERROR "Please set VLLM_PYTHON_EXECUTABLE to the path of the desired python version" " before running cmake configure.") endif() # # Update cmake's `CMAKE_PREFIX_PATH` with torch location. # append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path") # # Import torch cmake configuration. # Torch also imports CUDA (and partially HIP) languages with some customizations, # so there is no need to do this explicitly with check_language/enable_language, # etc. # find_package(Torch REQUIRED) # # Forward the non-CUDA device extensions to external CMake scripts. # if (NOT VLLM_TARGET_DEVICE STREQUAL "cuda" AND NOT VLLM_TARGET_DEVICE STREQUAL "rocm") if (VLLM_TARGET_DEVICE STREQUAL "cpu") include(${CMAKE_CURRENT_LIST_DIR}/cmake/cpu_extension.cmake) else() return() endif() return() endif() # # Set up GPU language and check the torch version and warn if it isn't # what is expected. # if (VLLM_TARGET_DEVICE STREQUAL "cuda") # Include CUDA specific configuration include(${CMAKE_CURRENT_LIST_DIR}/cmake/cuda.cmake) elseif(VLLM_TARGET_DEVICE STREQUAL "rocm") # Include ROCm specific configuration include(${CMAKE_CURRENT_LIST_DIR}/cmake/hip.cmake) elseif(VLLM_TARGET_DEVICE STREQUAL "cpu") include(${CMAKE_CURRENT_LIST_DIR}/cmake/cpu_extension.cmake) elseif(VLLM_TARGET_DEVICE STREQUAL "musa") include(${CMAKE_CURRENT_LIST_DIR}/cmake/musa.cmake) else() message(FATAL_ERROR "Can't find CUDA or HIP installation.") endif() # Use FetchContent for C++ dependencies that are compiled as part of vLLM's build process. include(FetchContent) file(MAKE_DIRECTORY ${FETCHCONTENT_BASE_DIR}) # Ensure the directory exists message(STATUS "FetchContent base directory: ${FETCHCONTENT_BASE_DIR}") # # Define other extension targets # # # cumem_allocator extension # Architecture-specific cumem configurations are included from cmake/cuda.cmake or cmake/hip.cmake # if(VLLM_GPU_LANG STREQUAL "CUDA" OR VLLM_GPU_LANG STREQUAL "HIP") message(STATUS "Enabling cumem allocator extension.") define_extension_target( cumem_allocator DESTINATION vllm LANGUAGE CXX SOURCES ${VLLM_CUMEM_EXT_SRC} LIBRARIES ${CUMEM_LIBS} USE_SABI 3.8 WITH_SOABI) endif() # # _C extension # # VLLM_EXT_SRC is defined in the architecture-specific cmake files (cuda.cmake or hip.cmake) message(STATUS "Enabling C extension.") define_extension_target( _C DESTINATION vllm LANGUAGE ${VLLM_GPU_LANG} SOURCES ${VLLM_EXT_SRC} COMPILE_FLAGS ${VLLM_GPU_FLAGS} ARCHITECTURES ${VLLM_GPU_ARCHES} INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR} INCLUDE_DIRECTORIES ${CUTLASS_TOOLS_UTIL_INCLUDE_DIR} USE_SABI 3 WITH_SOABI) # If CUTLASS is compiled on NVCC >= 12.5, it by default uses # cudaGetDriverEntryPointByVersion as a wrapper to avoid directly calling the # driver API. This causes problems when linking with earlier versions of CUDA. # Setting this variable sidesteps the issue by calling the driver directly. target_compile_definitions(_C PRIVATE CUTLASS_ENABLE_DIRECT_CUDA_DRIVER_CALL=1) # _moe_C extension # Architecture-specific MOE configurations are included from cmake/cuda.cmake or cmake/hip.cmake message(STATUS "Enabling moe extension.") define_extension_target( _moe_C DESTINATION vllm LANGUAGE ${VLLM_GPU_LANG} SOURCES ${VLLM_MOE_EXT_SRC} COMPILE_FLAGS ${VLLM_GPU_FLAGS} ARCHITECTURES ${VLLM_GPU_ARCHES} INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR} INCLUDE_DIRECTORIES ${CUTLASS_TOOLS_UTIL_INCLUDE_DIR} USE_SABI 3 WITH_SOABI) # Architecture-specific ROCm configurations are included from cmake/hip.cmake # For CUDA and HIP builds also build the triton_kernels external package. if(VLLM_GPU_LANG STREQUAL "CUDA" OR VLLM_GPU_LANG STREQUAL "HIP") include(cmake/external_projects/triton_kernels.cmake) endif() # For CUDA we also build and ship some external projects. if (VLLM_GPU_LANG STREQUAL "CUDA") include(cmake/external_projects/flashmla.cmake) include(cmake/external_projects/qutlass.cmake) # vllm-flash-attn should be last as it overwrites some CMake functions include(cmake/external_projects/vllm_flash_attn.cmake) endif ()