62 lines
1.6 KiB
CMake
62 lines
1.6 KiB
CMake
cmake_minimum_required(VERSION 3.18)
|
|
project(sgl-kernel LANGUAGES CXX CUDA)
|
|
|
|
# Basic settings
|
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
|
set(CMAKE_CXX_STANDARD 17)
|
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
set(CMAKE_CUDA_STANDARD 17)
|
|
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
|
|
|
|
set(CUTLASS_DIR "3rdparty/cutlass")
|
|
|
|
# Set CUDA architectures
|
|
set(CMAKE_CUDA_ARCHITECTURES "75;80;86;89;90")
|
|
message(STATUS "Building for CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
|
|
|
|
find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
|
|
|
|
# Find PyTorch
|
|
execute_process(
|
|
COMMAND ${Python3_EXECUTABLE} -c "import torch; print(torch.utils.cmake_prefix_path)"
|
|
OUTPUT_VARIABLE TORCH_CMAKE_PATH
|
|
OUTPUT_STRIP_TRAILING_WHITESPACE
|
|
)
|
|
list(APPEND CMAKE_PREFIX_PATH "${TORCH_CMAKE_PATH}")
|
|
|
|
find_package(Torch REQUIRED)
|
|
|
|
# Warp Reduce library
|
|
add_library(_kernels SHARED
|
|
src/sgl-kernel/csrc/trt_reduce_internal.cu
|
|
src/sgl-kernel/csrc/trt_reduce_kernel.cu
|
|
src/sgl-kernel/csrc/moe_align_kernel.cu
|
|
src/sgl-kernel/csrc/sgl_kernel_ops.cu
|
|
)
|
|
|
|
target_include_directories(_kernels
|
|
PRIVATE
|
|
${CMAKE_CURRENT_SOURCE_DIR}/src/sgl-kernel/csrc
|
|
${CUDA_INCLUDE_DIRS}
|
|
${TORCH_INCLUDE_DIRS}
|
|
${CUTLASS_DIR}/include
|
|
${CUTLASS_DIR}/tools/util/include
|
|
)
|
|
|
|
target_link_libraries(_kernels
|
|
PRIVATE
|
|
${TORCH_LIBRARIES}
|
|
Python3::Python
|
|
)
|
|
|
|
# Set common properties for both libraries
|
|
foreach(target _kernels)
|
|
set_target_properties(${target} PROPERTIES
|
|
CUDA_SEPARABLE_COMPILATION ON
|
|
POSITION_INDEPENDENT_CODE ON
|
|
CUDA_RESOLVE_DEVICE_SYMBOLS ON
|
|
PREFIX ""
|
|
SUFFIX ".so"
|
|
)
|
|
endforeach()
|