feat: support fa cute in sgl-kernel (#10205)

Co-authored-by: cicirori <32845984+cicirori@users.noreply.github.com>
2025-09-09 00:14:39 -07:00
parent d1d4074c4e
commit 94fb4e9e54
5 changed files with 1315 additions and 0 deletions
--- a/sgl-kernel/CMakeLists.txt
+++ b/sgl-kernel/CMakeLists.txt
@@ -95,6 +95,15 @@ FetchContent_Declare(
 )
 FetchContent_Populate(repo-flash-attention)

+# flash-attention origin
+FetchContent_Declare(
+    repo-flash-attention-origin
+    GIT_REPOSITORY https://github.com/Dao-AILab/flash-attention.git
+    GIT_TAG        203b9b3dba39d5d08dffb49c09aa622984dff07d
+    GIT_SHALLOW    OFF
+)
+FetchContent_Populate(repo-flash-attention-origin)
+
 # mscclpp
 FetchContent_Declare(
    repo-mscclpp
@@ -512,3 +521,13 @@ install(DIRECTORY "${repo-triton_SOURCE_DIR}/python/triton_kernels/triton_kernel
        DESTINATION "triton_kernels"
        PATTERN ".git*" EXCLUDE
        PATTERN "__pycache__" EXCLUDE)
+
+# flash attention 4
+# TODO: find a better install condition.
+if ("${CUDA_VERSION}" VERSION_GREATER_EQUAL "12.8" OR SGL_KERNEL_ENABLE_SM100A)
+    # flash_attn/cute
+    install(DIRECTORY "${repo-flash-attention-origin_SOURCE_DIR}/flash_attn/cute/"
+            DESTINATION "flash_attn/cute"
+            PATTERN ".git*" EXCLUDE
+            PATTERN "__pycache__" EXCLUDE)
+    endif()