2025-04-03 14:52:34 +08:00
|
|
|
cmake_minimum_required(VERSION 3.16)
|
|
|
|
|
project(vllm_ascend_C)
|
|
|
|
|
|
|
|
|
|
# include(CheckCXXcompilerFlag)
|
|
|
|
|
# check_cxx_compiler_flag("-std=c++17", COMPILER_SUPPORTS_CXX17)
|
Add sleep mode feature for Ascend NPU (#513)
### What this PR does / why we need it?
This PR adds sleep mode feature for vllm-ascend, when sleeps, we do
mainly two things:
- offload model weights
- discard kv cache
RLHF tools(such as https://github.com/volcengine/verl and
https://github.com/OpenRLHF/OpenRLHF) have a strong need of sleep mode
to accelerate the training process.
This PR may solve #375 and #320 .
### Does this PR introduce _any_ user-facing change?
No existing user interfaces changed.
Users will have two new methods(`sleep()` and `wake_up()`) to use.
### How was this patch tested?
This PR is tested with Qwen/Qwen2.5-0.5B-Instruct.
At first, we have free NPU memory M1.
After `llm = LLM("Qwen/Qwen2.5-0.5B-Instruct", enable_sleep_mode=True)`
executed, we have free NPU memory M2. M2 < M1.
Then we call `llm.sleep(level=1)`, we have free NPU memory M3.
We have M3 > M2, M3 is very close to M1.
Plus, we have the same output tokens before sleep and after wake up,
with the config of `SamplingParams(temperature=0, max_tokens=10)` and
with the same input tokens of course.
This PR is utilizing the CMake procedure of #371 , thanks a lot.
Signed-off-by: Shuqiao Li <celestialli@outlook.com>
2025-04-18 13:11:39 +08:00
|
|
|
set(CMAKE_CXX_STANDARD 17)
|
2025-04-03 14:52:34 +08:00
|
|
|
|
|
|
|
|
include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
|
|
|
|
|
|
|
|
|
|
# Suppress potential warnings about unused manually-specified variables
|
|
|
|
|
set(ignoreMe "${VLLM_PYTHON_PATH}")
|
|
|
|
|
|
2025-04-27 17:28:29 +08:00
|
|
|
# TODO: Add 3.12 back when torch-npu support 3.12
|
|
|
|
|
set(PYTHON_SUPPORTED_VERSIONS "3.9" "3.10" "3.11")
|
2025-04-03 14:52:34 +08:00
|
|
|
|
|
|
|
|
find_package(pybind11 REQUIRED)
|
|
|
|
|
|
|
|
|
|
append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path")
|
|
|
|
|
set(VLLM_ASCEND_INSTALL_PATH "${CMAKE_INSTALL_PREFIX}")
|
|
|
|
|
|
|
|
|
|
find_package(Torch REQUIRED)
|
|
|
|
|
|
|
|
|
|
set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu")
|
2025-04-12 10:24:53 +08:00
|
|
|
set(SOC_VERSION ${SOC_VERSION})
|
2025-04-03 14:52:34 +08:00
|
|
|
message(STATUS "Detected SOC version: ${SOC_VERSION}")
|
|
|
|
|
|
|
|
|
|
if (NOT CMAKE_BUILD_TYPE)
|
|
|
|
|
set(CMAKE_BUILD_TYPE "Release" CACHE STRINGS "Build type Release/Debug (default Release)" FORCE)
|
|
|
|
|
endif()
|
|
|
|
|
|
|
|
|
|
if (CMAKE_INSTALL_PREFIX STREQUAL /usr/local)
|
|
|
|
|
set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/out" CACHE STRINGS "path to install()")
|
|
|
|
|
endif()
|
|
|
|
|
|
|
|
|
|
set(ASCEND_CANN_PACKAGE_PATH ${ASCEND_HOME_PATH})
|
|
|
|
|
if(EXISTS ${ASCEND_HOME_PATH}/tools/tikcpp/ascendc_kernel_cmake)
|
|
|
|
|
set(ASCENDC_CMAKE_DIR ${ASCEND_HOME_PATH}/tools/tikcpp/ascendc_kernel_cmake)
|
|
|
|
|
elseif(EXISTS ${ASCEND_HOME_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
|
|
|
|
|
set(ASCENDC_CMAKE_DIR ${ASCEND_HOME_PATH}/compiler/tikcpp/ascendc_kernel_cmake)
|
|
|
|
|
elseif(EXISTS ${ASCEND_HOME_PATH}/ascendc_devkit/tikcpp/samples/cmake)
|
|
|
|
|
set(ASCENDC_CMAKE_DIR ${ASCEND_HOME_PATH}/ascendc_devkit/tikcpp/samples/cmake)
|
|
|
|
|
else()
|
|
|
|
|
message(FATAL_ERROR "ascendc_kernel_cmake does not exist, please check whether the cann package is installed.")
|
|
|
|
|
endif()
|
|
|
|
|
|
|
|
|
|
include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
|
|
|
|
|
file(GLOB KERNEL_FILES
|
|
|
|
|
${CMAKE_CURRENT_SOURCE_DIR}/csrc/kernels/pos_encoding_kernels.cpp)
|
|
|
|
|
|
|
|
|
|
ascendc_library(vllm_ascend_kernels SHARED
|
|
|
|
|
${KERNEL_FILES}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
message("TORCH_NPU_PATH is ${TORCH_NPU_PATH}")
|
|
|
|
|
|
|
|
|
|
file(GLOB VLLM_ASCEND_SRC
|
|
|
|
|
${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp)
|
|
|
|
|
|
|
|
|
|
include_directories(
|
|
|
|
|
${pybind11_INCLUDE_DIRS}
|
|
|
|
|
${PYTHON_INCLUDE_PATH}
|
|
|
|
|
${TORCH_INCLUDE_DIRS}
|
|
|
|
|
${TORCH_NPU_PATH}/include
|
|
|
|
|
${ASCEND_HOME_PATH}/include
|
|
|
|
|
${ASCEND_HOME_PATH}/aarch64-linux/include/experiment/platform
|
|
|
|
|
${ASCEND_HOME_PATH}/x86_64-linux/include/experiment/platform
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
set(
|
|
|
|
|
INCLUDES
|
|
|
|
|
${TORCH_INCLUDE_DIRS}
|
|
|
|
|
${TORCH_NPU_INCLUDE_DIRS}
|
|
|
|
|
${ASCEND_HOME_PATH}/include
|
|
|
|
|
${ASCEND_HOME_PATH}/aarch64-linux/include/experiment/platform
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
pybind11_add_module(vllm_ascend_C ${VLLM_ASCEND_SRC})
|
|
|
|
|
|
|
|
|
|
target_link_directories(
|
|
|
|
|
vllm_ascend_C
|
|
|
|
|
PRIVATE
|
|
|
|
|
${TORCH_NPU_PATH}/lib/
|
|
|
|
|
${ASCEND_HOME_PATH}/lib64
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
target_link_libraries(
|
|
|
|
|
vllm_ascend_C
|
|
|
|
|
PUBLIC
|
|
|
|
|
${TORCH_LIBRARIES}
|
|
|
|
|
libtorch_npu.so
|
|
|
|
|
vllm_ascend_kernels
|
|
|
|
|
ascendcl
|
|
|
|
|
platform
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
target_link_options(vllm_ascend_C PRIVATE "-Wl,-rpath,$ORIGIN:$ORIGIN/lib")
|
|
|
|
|
|
|
|
|
|
install(TARGETS vllm_ascend_C vllm_ascend_kernels DESTINATION ${VLLM_ASCEND_INSTALL_PATH})
|
|
|
|
|
|
|
|
|
|
|