Move rope and bmm into sgl-kernel (#4241)

This commit is contained in:
Lianmin Zheng
2025-03-09 18:38:15 -07:00
committed by GitHub
parent 9dfafa743c
commit eb06dbcbf8
5 changed files with 183 additions and 13 deletions

View File

@@ -97,6 +97,8 @@ sources = [
"csrc/allreduce/trt_reduce_kernel.cu",
"csrc/attention/lightning_attention_decode_kernel.cu",
"csrc/elementwise/fused_add_rms_norm_kernel.cu",
"csrc/elementwise/rope.cu",
"csrc/gemm/bmm_fp8.cu",
"csrc/gemm/cublas_grouped_gemm.cu",
"csrc/gemm/fp8_gemm_kernel.cu",
"csrc/gemm/fp8_blockwise_gemm_kernel.cu",
@@ -109,11 +111,9 @@ sources = [
"csrc/speculative/speculative_sampling.cu",
"csrc/torch_extension.cc",
"3rdparty/flashinfer/csrc/activation.cu",
"3rdparty/flashinfer/csrc/bmm_fp8.cu",
"3rdparty/flashinfer/csrc/norm.cu",
"3rdparty/flashinfer/csrc/sampling.cu",
"3rdparty/flashinfer/csrc/renorm.cu",
"3rdparty/flashinfer/csrc/rope.cu",
"3rdparty/flashinfer/csrc/sampling.cu",
]
enable_bf16 = os.getenv("SGL_KERNEL_ENABLE_BF16", "0") == "1"