Move rope and bmm into sgl-kernel (#4241)
This commit is contained in:
@@ -97,6 +97,8 @@ sources = [
|
||||
"csrc/allreduce/trt_reduce_kernel.cu",
|
||||
"csrc/attention/lightning_attention_decode_kernel.cu",
|
||||
"csrc/elementwise/fused_add_rms_norm_kernel.cu",
|
||||
"csrc/elementwise/rope.cu",
|
||||
"csrc/gemm/bmm_fp8.cu",
|
||||
"csrc/gemm/cublas_grouped_gemm.cu",
|
||||
"csrc/gemm/fp8_gemm_kernel.cu",
|
||||
"csrc/gemm/fp8_blockwise_gemm_kernel.cu",
|
||||
@@ -109,11 +111,9 @@ sources = [
|
||||
"csrc/speculative/speculative_sampling.cu",
|
||||
"csrc/torch_extension.cc",
|
||||
"3rdparty/flashinfer/csrc/activation.cu",
|
||||
"3rdparty/flashinfer/csrc/bmm_fp8.cu",
|
||||
"3rdparty/flashinfer/csrc/norm.cu",
|
||||
"3rdparty/flashinfer/csrc/sampling.cu",
|
||||
"3rdparty/flashinfer/csrc/renorm.cu",
|
||||
"3rdparty/flashinfer/csrc/rope.cu",
|
||||
"3rdparty/flashinfer/csrc/sampling.cu",
|
||||
]
|
||||
|
||||
enable_bf16 = os.getenv("SGL_KERNEL_ENABLE_BF16", "0") == "1"
|
||||
|
||||
Reference in New Issue
Block a user