Unify SGL Kernel Releases (#10701)
This commit is contained in:
@@ -239,14 +239,9 @@ if ("${CUDA_VERSION}" VERSION_GREATER_EQUAL "12.8" OR SGL_KERNEL_ENABLE_SM100A)
|
||||
"-gencode=arch=compute_101a,code=sm_101a"
|
||||
)
|
||||
endif()
|
||||
|
||||
else()
|
||||
list(APPEND SGL_KERNEL_CUDA_FLAGS
|
||||
"-use_fast_math"
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("${CUDA_VERSION}" VERSION_GREATER_EQUAL "12.4" OR SGL_KERNEL_ENABLE_SM90A)
|
||||
if ("${CUDA_VERSION}" VERSION_GREATER_EQUAL "12.4")
|
||||
set(SGL_KERNEL_ENABLE_FA3 ON)
|
||||
list(APPEND SGL_KERNEL_CUDA_FLAGS
|
||||
"-gencode=arch=compute_90a,code=sm_90a"
|
||||
@@ -334,14 +329,47 @@ set(SOURCES
|
||||
"${repo-flash-attention_SOURCE_DIR}/csrc/flash_attn/flash_sparse_api.cpp"
|
||||
)
|
||||
|
||||
Python_add_library(common_ops MODULE USE_SABI ${SKBUILD_SABI_VERSION} WITH_SOABI ${SOURCES})
|
||||
# Build SM90 library with fast math optimization (same namespace, different directory)
|
||||
Python_add_library(common_ops_sm90_build MODULE USE_SABI ${SKBUILD_SABI_VERSION} WITH_SOABI ${SOURCES})
|
||||
|
||||
target_compile_options(common_ops PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${SGL_KERNEL_CUDA_FLAGS}>)
|
||||
target_include_directories(common_ops PRIVATE
|
||||
target_compile_definitions(common_ops_sm90_build PRIVATE
|
||||
USE_FAST_MATH=1
|
||||
)
|
||||
target_compile_options(common_ops_sm90_build PRIVATE
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:${SGL_KERNEL_CUDA_FLAGS} -use_fast_math>
|
||||
)
|
||||
target_include_directories(common_ops_sm90_build PRIVATE
|
||||
${PROJECT_SOURCE_DIR}/csrc
|
||||
${repo-cutlass_SOURCE_DIR}/examples/77_blackwell_fmha
|
||||
${repo-cutlass_SOURCE_DIR}/examples/common
|
||||
${repo-flash-attention_SOURCE_DIR}/csrc/flash_attn/src
|
||||
)
|
||||
# Set output name and separate build directory to avoid conflicts
|
||||
set_target_properties(common_ops_sm90_build PROPERTIES
|
||||
OUTPUT_NAME "common_ops"
|
||||
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/sm90"
|
||||
)
|
||||
|
||||
# Build SM100+ library with precise math (same namespace, different directory)
|
||||
Python_add_library(common_ops_sm100_build MODULE USE_SABI ${SKBUILD_SABI_VERSION} WITH_SOABI ${SOURCES})
|
||||
|
||||
target_compile_definitions(common_ops_sm100_build PRIVATE
|
||||
USE_FAST_MATH=0
|
||||
)
|
||||
target_compile_options(common_ops_sm100_build PRIVATE
|
||||
$<$<COMPILE_LANGUAGE:CUDA>:${SGL_KERNEL_CUDA_FLAGS}>
|
||||
)
|
||||
target_include_directories(common_ops_sm100_build PRIVATE
|
||||
${PROJECT_SOURCE_DIR}/csrc
|
||||
${repo-cutlass_SOURCE_DIR}/examples/77_blackwell_fmha
|
||||
${repo-cutlass_SOURCE_DIR}/examples/common
|
||||
${repo-flash-attention_SOURCE_DIR}/csrc/flash_attn/src
|
||||
)
|
||||
# Set output name and separate build directory to avoid conflicts
|
||||
set_target_properties(common_ops_sm100_build PROPERTIES
|
||||
OUTPUT_NAME "common_ops"
|
||||
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/sm100"
|
||||
)
|
||||
|
||||
find_package(Python3 COMPONENTS Interpreter REQUIRED)
|
||||
execute_process(
|
||||
@@ -367,16 +395,26 @@ add_subdirectory(
|
||||
${repo-mscclpp_SOURCE_DIR}
|
||||
${CMAKE_CURRENT_BINARY_DIR}/mscclpp-build
|
||||
)
|
||||
target_link_libraries(common_ops PRIVATE ${TORCH_LIBRARIES} c10 cuda cublas cublasLt mscclpp_static)
|
||||
target_link_libraries(common_ops_sm90_build PRIVATE ${TORCH_LIBRARIES} c10 cuda cublas cublasLt mscclpp_static)
|
||||
target_link_libraries(common_ops_sm100_build PRIVATE ${TORCH_LIBRARIES} c10 cuda cublas cublasLt mscclpp_static)
|
||||
|
||||
# flash attention
|
||||
target_compile_definitions(common_ops PRIVATE
|
||||
target_compile_definitions(common_ops_sm90_build PRIVATE
|
||||
FLASHATTENTION_DISABLE_BACKWARD
|
||||
FLASHATTENTION_DISABLE_DROPOUT
|
||||
FLASHATTENTION_DISABLE_UNEVEN_K
|
||||
)
|
||||
target_compile_definitions(common_ops_sm100_build PRIVATE
|
||||
FLASHATTENTION_DISABLE_BACKWARD
|
||||
FLASHATTENTION_DISABLE_DROPOUT
|
||||
FLASHATTENTION_DISABLE_UNEVEN_K
|
||||
)
|
||||
|
||||
install(TARGETS common_ops LIBRARY DESTINATION sgl_kernel)
|
||||
# Install to different subdirectories
|
||||
# CMake will find the built libraries in their respective LIBRARY_OUTPUT_DIRECTORY locations
|
||||
# and install them to the specified destinations
|
||||
install(TARGETS common_ops_sm90_build LIBRARY DESTINATION sgl_kernel/sm90)
|
||||
install(TARGETS common_ops_sm100_build LIBRARY DESTINATION sgl_kernel/sm100)
|
||||
|
||||
# ============================ Optional Install ============================= #
|
||||
# set flash-attention sources file
|
||||
|
||||
Reference in New Issue
Block a user