Move rope and bmm into sgl-kernel (#4241)

2025-03-09 18:38:15 -07:00
parent 9dfafa743c
commit eb06dbcbf8
5 changed files with 183 additions and 13 deletions
--- a/sgl-kernel/setup.py
+++ b/sgl-kernel/setup.py
@@ -97,6 +97,8 @@ sources = [
    "csrc/allreduce/trt_reduce_kernel.cu",
    "csrc/attention/lightning_attention_decode_kernel.cu",
    "csrc/elementwise/fused_add_rms_norm_kernel.cu",
+    "csrc/elementwise/rope.cu",
+    "csrc/gemm/bmm_fp8.cu",
    "csrc/gemm/cublas_grouped_gemm.cu",
    "csrc/gemm/fp8_gemm_kernel.cu",
    "csrc/gemm/fp8_blockwise_gemm_kernel.cu",
@@ -109,11 +111,9 @@ sources = [
    "csrc/speculative/speculative_sampling.cu",
    "csrc/torch_extension.cc",
    "3rdparty/flashinfer/csrc/activation.cu",
-    "3rdparty/flashinfer/csrc/bmm_fp8.cu",
    "3rdparty/flashinfer/csrc/norm.cu",
-    "3rdparty/flashinfer/csrc/sampling.cu",
    "3rdparty/flashinfer/csrc/renorm.cu",
-    "3rdparty/flashinfer/csrc/rope.cu",
+    "3rdparty/flashinfer/csrc/sampling.cu",
 ]

 enable_bf16 = os.getenv("SGL_KERNEL_ENABLE_BF16", "0") == "1"