CUDA: faster tile FA, add oob checks, more HSs (#16492)

This commit is contained in:
Johannes Gäßler
2025-10-11 20:54:32 +02:00
committed by GitHub
parent a3cb04744f
commit 11f0af5504
18 changed files with 1358 additions and 784 deletions

View File

@@ -30,6 +30,8 @@ if (MUSAToolkit_FOUND)
list(APPEND GGML_HEADERS_MUSA "../ggml-musa/mudnn.cuh")
file(GLOB GGML_SOURCES_MUSA "../ggml-cuda/*.cu")
file(GLOB SRCS "../ggml-cuda/template-instances/fattn-tile*.cu")
list(APPEND GGML_SOURCES_MUSA ${SRCS})
file(GLOB SRCS "../ggml-cuda/template-instances/fattn-mma*.cu")
list(APPEND GGML_SOURCES_MUSA ${SRCS})
file(GLOB SRCS "../ggml-cuda/template-instances/mmq*.cu")