xc-llm-ascend/CMakeLists.txt

cmake_minimum_required(VERSION 3.16)
project(vllm_ascend_C)

# include(CheckCXXcompilerFlag)
# check_cxx_compiler_flag("-std=c++17", COMPILER_SUPPORTS_CXX17)
set(CMAKE_CXX_STANDARD 17)

include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)

# Suppress potential warnings about unused manually-specified variables
set(ignoreMe "${VLLM_PYTHON_PATH}")

# TODO: Add 3.12 back when torch-npu support 3.12
set(PYTHON_SUPPORTED_VERSIONS "3.9" "3.10" "3.11")

find_package(pybind11 REQUIRED)

append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path")
set(VLLM_ASCEND_INSTALL_PATH "${CMAKE_INSTALL_PREFIX}")

find_package(Torch REQUIRED)

set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu")
set(SOC_VERSION ${SOC_VERSION})
message(STATUS "Detected SOC version: ${SOC_VERSION}")

if (NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE "Release" CACHE STRINGS "Build type Release/Debug (default Release)" FORCE)
endif()

if (CMAKE_INSTALL_PREFIX STREQUAL /usr/local)
  set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRINGS "path to install()")
endif()

set(ASCEND_CANN_PACKAGE_PATH ${ASCEND_HOME_PATH})
if(EXISTS ${ASCEND_HOME_PATH}/tools/tikcpp/ascendc_kernel_cmake)
    set(ASCENDC_CMAKE_DIR ${ASCEND_HOME_PATH}/tools/tikcpp/ascendc_kernel_cmake)
elseif(EXISTS ${ASCEND_HOME_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
    set(ASCENDC_CMAKE_DIR ${ASCEND_HOME_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
elseif(EXISTS ${ASCEND_HOME_PATH}/ascendc_devkit/tikcpp/samples/cmake)
    set(ASCENDC_CMAKE_DIR ${ASCEND_HOME_PATH}/ascendc_devkit/tikcpp/samples/cmake)
else()
    message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the cann package is installed.")
endif()

include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
file(GLOB KERNEL_FILES
${CMAKE_CURRENT_SOURCE_DIR}/csrc/kernels/pos_encoding_kernels.cpp)

ascendc_library(vllm_ascend_kernels SHARED
    ${KERNEL_FILES}
)

message("TORCH_NPU_PATH is ${TORCH_NPU_PATH}")

file(GLOB VLLM_ASCEND_SRC
${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp)

include_directories(
  ${pybind11_INCLUDE_DIRS}
  ${PYTHON_INCLUDE_PATH}
  ${TORCH_INCLUDE_DIRS}
  ${TORCH_NPU_PATH}/include
  ${ASCEND_HOME_PATH}/include
  ${ASCEND_HOME_PATH}/aarch64-linux/include/experiment/platform
  ${ASCEND_HOME_PATH}/x86_64-linux/include/experiment/platform
)

set(
  INCLUDES
  ${TORCH_INCLUDE_DIRS}
  ${TORCH_NPU_INCLUDE_DIRS}
  ${ASCEND_HOME_PATH}/include
  ${ASCEND_HOME_PATH}/aarch64-linux/include/experiment/platform
)

pybind11_add_module(vllm_ascend_C ${VLLM_ASCEND_SRC})

target_link_directories(
  vllm_ascend_C
  PRIVATE
  ${TORCH_NPU_PATH}/lib/
  ${ASCEND_HOME_PATH}/lib64
)

target_link_libraries(
  vllm_ascend_C
  PUBLIC
  ${TORCH_LIBRARIES}
  libtorch_npu.so
  vllm_ascend_kernels
  ascendcl
  platform
)

target_link_options(vllm_ascend_C PRIVATE "-Wl,-rpath,$ORIGIN:$ORIGIN/lib")

install(TARGETS vllm_ascend_C vllm_ascend_kernels DESTINATION ${VLLM_ASCEND_INSTALL_PATH})
[core] Support custom ascendc kernels in vllm-ascend (#233) This PR add custom ascendc kernel rotary_embedding support in vllm-ascend, related CMakeLists and setuptools is also added in this PR. Related: https://github.com/vllm-project/vllm-ascend/issues/156 --------- Signed-off-by: ganyi <pleaplusone.gy@gmail.com> 2025-04-03 14:52:34 +08:00			`cmake_minimum_required(VERSION 3.16)`
			`project(vllm_ascend_C)`

			`# include(CheckCXXcompilerFlag)`
			`# check_cxx_compiler_flag("-std=c++17", COMPILER_SUPPORTS_CXX17)`
Add sleep mode feature for Ascend NPU (#513) ### What this PR does / why we need it? This PR adds sleep mode feature for vllm-ascend, when sleeps, we do mainly two things: - offload model weights - discard kv cache RLHF tools(such as https://github.com/volcengine/verl and https://github.com/OpenRLHF/OpenRLHF) have a strong need of sleep mode to accelerate the training process. This PR may solve #375 and #320 . ### Does this PR introduce _any_ user-facing change? No existing user interfaces changed. Users will have two new methods(`sleep()` and `wake_up()`) to use. ### How was this patch tested? This PR is tested with Qwen/Qwen2.5-0.5B-Instruct. At first, we have free NPU memory M1. After `llm = LLM("Qwen/Qwen2.5-0.5B-Instruct", enable_sleep_mode=True)` executed, we have free NPU memory M2. M2 < M1. Then we call `llm.sleep(level=1)`, we have free NPU memory M3. We have M3 > M2, M3 is very close to M1. Plus, we have the same output tokens before sleep and after wake up, with the config of `SamplingParams(temperature=0, max_tokens=10)` and with the same input tokens of course. This PR is utilizing the CMake procedure of #371 , thanks a lot. Signed-off-by: Shuqiao Li <celestialli@outlook.com> 2025-04-18 13:11:39 +08:00			`set(CMAKE_CXX_STANDARD 17)`
[core] Support custom ascendc kernels in vllm-ascend (#233) This PR add custom ascendc kernel rotary_embedding support in vllm-ascend, related CMakeLists and setuptools is also added in this PR. Related: https://github.com/vllm-project/vllm-ascend/issues/156 --------- Signed-off-by: ganyi <pleaplusone.gy@gmail.com> 2025-04-03 14:52:34 +08:00
			`include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)`

			`# Suppress potential warnings about unused manually-specified variables`
			`set(ignoreMe "${VLLM_PYTHON_PATH}")`

[BUILD] Upgrade torch-npu to 2.5.1 (#661) ### What this PR does / why we need it? The torch-npu 2.5.1 are published: https://pypi.org/project/torch-npu/2.5.1/ It's time to remove all torch-npu dev version from vllm-ascend code base ### Does this PR introduce _any_ user-facing change? Yes, using torch-npu 2.5.1 ### How was this patch tested? - [ ] CI passed - [ ] Manually test - [ ] Grep all `dev2025` --------- Signed-off-by: Yikun Jiang <yikunkero@gmail.com> 2025-04-27 17:28:29 +08:00			`# TODO: Add 3.12 back when torch-npu support 3.12`
			`set(PYTHON_SUPPORTED_VERSIONS "3.9" "3.10" "3.11")`
[core] Support custom ascendc kernels in vllm-ascend (#233) This PR add custom ascendc kernel rotary_embedding support in vllm-ascend, related CMakeLists and setuptools is also added in this PR. Related: https://github.com/vllm-project/vllm-ascend/issues/156 --------- Signed-off-by: ganyi <pleaplusone.gy@gmail.com> 2025-04-03 14:52:34 +08:00
			`find_package(pybind11 REQUIRED)`

			`append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path")`
			`set(VLLM_ASCEND_INSTALL_PATH "${CMAKE_INSTALL_PREFIX}")`

			`find_package(Torch REQUIRED)`

			`set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu")`
[CI] enable custom ops build (#466) ### What this PR does / why we need it? This PR enable custom ops build by default. ### Does this PR introduce _any_ user-facing change? Yes, users now install vllm-ascend from source will trigger custom ops build step. ### How was this patch tested? By image build and e2e CI --------- Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> 2025-04-12 10:24:53 +08:00			`set(SOC_VERSION ${SOC_VERSION})`
[core] Support custom ascendc kernels in vllm-ascend (#233) This PR add custom ascendc kernel rotary_embedding support in vllm-ascend, related CMakeLists and setuptools is also added in this PR. Related: https://github.com/vllm-project/vllm-ascend/issues/156 --------- Signed-off-by: ganyi <pleaplusone.gy@gmail.com> 2025-04-03 14:52:34 +08:00			`message(STATUS "Detected SOC version: ${SOC_VERSION}")`

			`if (NOT CMAKE_BUILD_TYPE)`
			`set(CMAKE_BUILD_TYPE "Release" CACHE STRINGS "Build type Release/Debug (default Release)" FORCE)`
			`endif()`

			`if (CMAKE_INSTALL_PREFIX STREQUAL /usr/local)`
			`set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRINGS "path to install()")`
			`endif()`

			`set(ASCEND_CANN_PACKAGE_PATH ${ASCEND_HOME_PATH})`
			`if(EXISTS ${ASCEND_HOME_PATH}/tools/tikcpp/ascendc_kernel_cmake)`
			`set(ASCENDC_CMAKE_DIR ${ASCEND_HOME_PATH}/tools/tikcpp/ascendc_kernel_cmake)`
			`elseif(EXISTS ${ASCEND_HOME_PATH}/compiler/tikcpp/ascendc_kernel_cmake)`
			`set(ASCENDC_CMAKE_DIR ${ASCEND_HOME_PATH}/compiler/tikcpp/ascendc_kernel_cmake)`
			`elseif(EXISTS ${ASCEND_HOME_PATH}/ascendc_devkit/tikcpp/samples/cmake)`
			`set(ASCENDC_CMAKE_DIR ${ASCEND_HOME_PATH}/ascendc_devkit/tikcpp/samples/cmake)`
			`else()`
			`message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the cann package is installed.")`
			`endif()`

			`include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)`
			`file(GLOB KERNEL_FILES`
			`${CMAKE_CURRENT_SOURCE_DIR}/csrc/kernels/pos_encoding_kernels.cpp)`

			`ascendc_library(vllm_ascend_kernels SHARED`
			`${KERNEL_FILES}`
			`)`

			`message("TORCH_NPU_PATH is ${TORCH_NPU_PATH}")`

			`file(GLOB VLLM_ASCEND_SRC`
			`${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp)`

			`include_directories(`
			`${pybind11_INCLUDE_DIRS}`
			`${PYTHON_INCLUDE_PATH}`
			`${TORCH_INCLUDE_DIRS}`
			`${TORCH_NPU_PATH}/include`
			`${ASCEND_HOME_PATH}/include`
			`${ASCEND_HOME_PATH}/aarch64-linux/include/experiment/platform`
			`${ASCEND_HOME_PATH}/x86_64-linux/include/experiment/platform`
			`)`

			`set(`
			`INCLUDES`
			`${TORCH_INCLUDE_DIRS}`
			`${TORCH_NPU_INCLUDE_DIRS}`
			`${ASCEND_HOME_PATH}/include`
			`${ASCEND_HOME_PATH}/aarch64-linux/include/experiment/platform`
			`)`

			`pybind11_add_module(vllm_ascend_C ${VLLM_ASCEND_SRC})`

			`target_link_directories(`
			`vllm_ascend_C`
			`PRIVATE`
			`${TORCH_NPU_PATH}/lib/`
			`${ASCEND_HOME_PATH}/lib64`
			`)`

			`target_link_libraries(`
			`vllm_ascend_C`
			`PUBLIC`
			`${TORCH_LIBRARIES}`
			`libtorch_npu.so`
			`vllm_ascend_kernels`
			`ascendcl`
			`platform`
			`)`

			`target_link_options(vllm_ascend_C PRIVATE "-Wl,-rpath,$ORIGIN:$ORIGIN/lib")`

			`install(TARGETS vllm_ascend_C vllm_ascend_kernels DESTINATION ${VLLM_ASCEND_INSTALL_PATH})`