adapt tensorrt llm custom all reduce to sgl-kernel (#2481)

Co-authored-by: Yineng Zhang <me@zhyncs.com>
This commit is contained in:
yizhang2077
2024-12-15 13:15:59 +08:00
committed by GitHub
parent 5f2595be43
commit e04d3f2897
13 changed files with 872 additions and 32 deletions

View File

@@ -1,47 +1,75 @@
cmake_minimum_required(VERSION 3.18)
project(sgl-kernel LANGUAGES CXX CUDA)
# Basic settings
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
find_package(PythonInterp 3 REQUIRED)
find_package(PythonLibs 3 REQUIRED)
# Set CUDA architectures
set(CMAKE_CUDA_ARCHITECTURES "75;80;86;89;90")
message(STATUS "Building for CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
# Find PyTorch
execute_process(
COMMAND ${PYTHON_EXECUTABLE} -c "import torch; print(torch.utils.cmake_prefix_path)"
COMMAND ${Python3_EXECUTABLE} -c "import torch; print(torch.utils.cmake_prefix_path)"
OUTPUT_VARIABLE TORCH_CMAKE_PATH
OUTPUT_STRIP_TRAILING_WHITESPACE
)
message(STATUS "PYTHON_EXECUTABLE: ${PYTHON_EXECUTABLE}")
message(STATUS "TORCH_CMAKE_PATH: ${TORCH_CMAKE_PATH}")
list(APPEND CMAKE_PREFIX_PATH "${TORCH_CMAKE_PATH}")
find_package(Torch REQUIRED)
include_directories(${PYTHON_INCLUDE_DIRS})
# Warp Reduce library
add_library(warp_reduce SHARED
src/sgl-kernel/csrc/warp_reduce.cc
src/sgl-kernel/csrc/warp_reduce_kernel.cu
)
target_include_directories(warp_reduce PRIVATE
${CUDA_INCLUDE_DIRS}
${TORCH_INCLUDE_DIRS}
target_include_directories(warp_reduce
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/sgl-kernel/csrc
${CUDA_INCLUDE_DIRS}
${TORCH_INCLUDE_DIRS}
)
target_link_libraries(warp_reduce PRIVATE
${TORCH_LIBRARIES}
${PYTHON_LIBRARIES}
target_link_libraries(warp_reduce
PRIVATE
${TORCH_LIBRARIES}
Python3::Python
)
set_target_properties(warp_reduce PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
# TRT Reduce library
add_library(trt_reduce SHARED
src/sgl-kernel/csrc/trt_reduce.cc
src/sgl-kernel/csrc/trt_reduce_internal.cu
src/sgl-kernel/csrc/trt_reduce_kernel.cu
)
target_include_directories(trt_reduce
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/sgl-kernel/csrc
${CUDA_INCLUDE_DIRS}
${TORCH_INCLUDE_DIRS}
)
target_link_libraries(trt_reduce
PRIVATE
${TORCH_LIBRARIES}
Python3::Python
)
# Set common properties for both libraries
foreach(target warp_reduce trt_reduce)
set_target_properties(${target} PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
POSITION_INDEPENDENT_CODE ON
CUDA_RESOLVE_DEVICE_SYMBOLS ON
PREFIX ""
SUFFIX ".so"
)
endforeach()