diff --git a/sgl-kernel/CMakeLists.txt b/sgl-kernel/CMakeLists.txt deleted file mode 100644 index 623984f2f..000000000 --- a/sgl-kernel/CMakeLists.txt +++ /dev/null @@ -1,65 +0,0 @@ -cmake_minimum_required(VERSION 3.18) -project(sgl-kernel LANGUAGES CXX CUDA) - -# Basic settings -set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CUDA_STANDARD 17) -set(CMAKE_CUDA_STANDARD_REQUIRED ON) - -set(CUTLASS_DIR "3rdparty/cutlass") -set(CUB_DIR "3rdparty/cub") - -# Set CUDA architectures -set(CMAKE_CUDA_ARCHITECTURES "75;80;86;89;90") -message(STATUS "Building for CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") - -find_package(Python3 COMPONENTS Interpreter Development REQUIRED) - -# Find PyTorch -execute_process( - COMMAND ${Python3_EXECUTABLE} -c "import torch; print(torch.utils.cmake_prefix_path)" - OUTPUT_VARIABLE TORCH_CMAKE_PATH - OUTPUT_STRIP_TRAILING_WHITESPACE -) -list(APPEND CMAKE_PREFIX_PATH "${TORCH_CMAKE_PATH}") - -find_package(Torch REQUIRED) - -# Warp Reduce library -add_library(_kernels SHARED - src/sgl-kernel/csrc/trt_reduce_internal.cu - src/sgl-kernel/csrc/trt_reduce_kernel.cu - src/sgl-kernel/csrc/moe_align_kernel.cu - src/sgl-kernel/csrc/int8_gemm_kernel.cu - src/sgl-kernel/csrc/sampling_scaling_penalties.cu - src/sgl-kernel/csrc/sgl_kernel_ops.cu -) - -target_include_directories(_kernels - PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/src/sgl-kernel/csrc - ${CUDA_INCLUDE_DIRS} - ${TORCH_INCLUDE_DIRS} - ${CUTLASS_DIR}/include - ${CUTLASS_DIR}/tools/util/include - ${CUB_DIR}/cub -) - -target_link_libraries(_kernels - PRIVATE - ${TORCH_LIBRARIES} - Python3::Python -) - -# Set common properties for both libraries -foreach(target _kernels) - set_target_properties(${target} PROPERTIES - CUDA_SEPARABLE_COMPILATION ON - POSITION_INDEPENDENT_CODE ON - CUDA_RESOLVE_DEVICE_SYMBOLS ON - PREFIX "" - SUFFIX ".so" - ) -endforeach() diff --git a/sgl-kernel/Makefile b/sgl-kernel/Makefile index 7a041b1ed..fac4c5c56 100644 --- a/sgl-kernel/Makefile +++ b/sgl-kernel/Makefile @@ -1,15 +1,18 @@ -.PHONY: tree ln install build clean test format +.PHONY: tree ln submodule install build clean test format tree: @tree --prune -I "__pycache__|*.egg-info|*.so|build" -ln: - @rm -rf build && cmake . -DCMAKE_EXPORT_COMPILE_COMMANDS=1 -DCMAKE_CUDA_COMPILER=nvcc -B build && rm -rf compile_commands.json && ln -s build/compile_commands.json compile_commands.json +submodule: + @git submodule update --init --recursive -install: +ln: submodule + @rm -rf build && bear python3 setup.py build + +install: submodule @pip install -e . -build: +build: submodule @export MAX_JOBS=$(nproc) && python3 setup.py bdist_wheel clean: