Blackwell Cutlass MLA kernel (#5142)

This commit is contained in:
Trevor Morris
2025-04-11 22:16:51 -07:00
committed by GitHub
parent 5ad0571903
commit f65b8d5c89
7 changed files with 371 additions and 3 deletions

View File

@@ -33,7 +33,7 @@ include(FetchContent)
FetchContent_Declare(
repo-cutlass
GIT_REPOSITORY https://github.com/NVIDIA/cutlass
GIT_TAG 6f4921858b3bb0a82d7cbeb4e499690e9ae60d16
GIT_TAG df8a550d3917b0e97f416b2ed8c2d786f7f686a3
GIT_SHALLOW OFF
)
FetchContent_Populate(repo-cutlass)
@@ -76,6 +76,8 @@ include_directories(
${PROJECT_SOURCE_DIR}/csrc
${repo-cutlass_SOURCE_DIR}/include
${repo-cutlass_SOURCE_DIR}/tools/util/include
${repo-cutlass_SOURCE_DIR}/examples/77_blackwell_fmha
${repo-cutlass_SOURCE_DIR}/examples/common
${repo-flashinfer_SOURCE_DIR}/include
${repo-flashinfer_SOURCE_DIR}/csrc
${repo-flash-attention_SOURCE_DIR}/hopper
@@ -158,6 +160,7 @@ string(REPLACE "-D__CUDA_NO_HALF2_OPERATORS__" "" CMAKE_CUDA_FLAGS "${CMAKE
set(SOURCES
"csrc/allreduce/custom_all_reduce.cu"
"csrc/attention/cutlass_mla_kernel.cu"
"csrc/attention/lightning_attention_decode_kernel.cu"
"csrc/elementwise/activation.cu"
"csrc/elementwise/fused_add_rms_norm_kernel.cu"