support lightning_attention_decode in sgl-kernel for MiniMax-Text-01 (#3030)

This commit is contained in:
Xiaoyu Zhang
2025-01-23 15:29:20 +08:00
committed by GitHub
parent 3e032c07cc
commit ac2dc35d0e
8 changed files with 588 additions and 8 deletions

View File

@@ -100,6 +100,7 @@ ext_modules = [
"src/sgl-kernel/csrc/moe_align_kernel.cu",
"src/sgl-kernel/csrc/int8_gemm_kernel.cu",
"src/sgl-kernel/csrc/sampling_scaling_penalties.cu",
"src/sgl-kernel/csrc/lightning_attention_decode_kernel.cu",
"src/sgl-kernel/csrc/sgl_kernel_ops.cu",
"src/sgl-kernel/csrc/rotary_embedding.cu",
"3rdparty/flashinfer/csrc/activation.cu",