Blackwell Cutlass MLA kernel (#5142)
This commit is contained in:
@@ -33,7 +33,7 @@ include(FetchContent)
|
||||
FetchContent_Declare(
|
||||
repo-cutlass
|
||||
GIT_REPOSITORY https://github.com/NVIDIA/cutlass
|
||||
GIT_TAG 6f4921858b3bb0a82d7cbeb4e499690e9ae60d16
|
||||
GIT_TAG df8a550d3917b0e97f416b2ed8c2d786f7f686a3
|
||||
GIT_SHALLOW OFF
|
||||
)
|
||||
FetchContent_Populate(repo-cutlass)
|
||||
@@ -76,6 +76,8 @@ include_directories(
|
||||
${PROJECT_SOURCE_DIR}/csrc
|
||||
${repo-cutlass_SOURCE_DIR}/include
|
||||
${repo-cutlass_SOURCE_DIR}/tools/util/include
|
||||
${repo-cutlass_SOURCE_DIR}/examples/77_blackwell_fmha
|
||||
${repo-cutlass_SOURCE_DIR}/examples/common
|
||||
${repo-flashinfer_SOURCE_DIR}/include
|
||||
${repo-flashinfer_SOURCE_DIR}/csrc
|
||||
${repo-flash-attention_SOURCE_DIR}/hopper
|
||||
@@ -158,6 +160,7 @@ string(REPLACE "-D__CUDA_NO_HALF2_OPERATORS__" "" CMAKE_CUDA_FLAGS "${CMAKE
|
||||
|
||||
set(SOURCES
|
||||
"csrc/allreduce/custom_all_reduce.cu"
|
||||
"csrc/attention/cutlass_mla_kernel.cu"
|
||||
"csrc/attention/lightning_attention_decode_kernel.cu"
|
||||
"csrc/elementwise/activation.cu"
|
||||
"csrc/elementwise/fused_add_rms_norm_kernel.cu"
|
||||
|
||||
Reference in New Issue
Block a user