feat: support fa cute in sgl-kernel (#10205)

Co-authored-by: cicirori <32845984+cicirori@users.noreply.github.com>
This commit is contained in:
Yineng Zhang
2025-09-09 00:14:39 -07:00
committed by GitHub
parent d1d4074c4e
commit 94fb4e9e54
5 changed files with 1315 additions and 0 deletions

View File

@@ -95,6 +95,15 @@ FetchContent_Declare(
)
FetchContent_Populate(repo-flash-attention)
# flash-attention origin
FetchContent_Declare(
repo-flash-attention-origin
GIT_REPOSITORY https://github.com/Dao-AILab/flash-attention.git
GIT_TAG 203b9b3dba39d5d08dffb49c09aa622984dff07d
GIT_SHALLOW OFF
)
FetchContent_Populate(repo-flash-attention-origin)
# mscclpp
FetchContent_Declare(
repo-mscclpp
@@ -512,3 +521,13 @@ install(DIRECTORY "${repo-triton_SOURCE_DIR}/python/triton_kernels/triton_kernel
DESTINATION "triton_kernels"
PATTERN ".git*" EXCLUDE
PATTERN "__pycache__" EXCLUDE)
# flash attention 4
# TODO: find a better install condition.
if ("${CUDA_VERSION}" VERSION_GREATER_EQUAL "12.8" OR SGL_KERNEL_ENABLE_SM100A)
# flash_attn/cute
install(DIRECTORY "${repo-flash-attention-origin_SOURCE_DIR}/flash_attn/cute/"
DESTINATION "flash_attn/cute"
PATTERN ".git*" EXCLUDE
PATTERN "__pycache__" EXCLUDE)
endif()