support lightning_attention_decode in sgl-kernel for MiniMax-Text-01 (#3030)
This commit is contained in:
@@ -100,6 +100,7 @@ ext_modules = [
|
||||
"src/sgl-kernel/csrc/moe_align_kernel.cu",
|
||||
"src/sgl-kernel/csrc/int8_gemm_kernel.cu",
|
||||
"src/sgl-kernel/csrc/sampling_scaling_penalties.cu",
|
||||
"src/sgl-kernel/csrc/lightning_attention_decode_kernel.cu",
|
||||
"src/sgl-kernel/csrc/sgl_kernel_ops.cu",
|
||||
"src/sgl-kernel/csrc/rotary_embedding.cu",
|
||||
"3rdparty/flashinfer/csrc/activation.cu",
|
||||
|
||||
Reference in New Issue
Block a user