Reorganize c++ source files in sgl-kernel with multiple folders (#4025)

This commit is contained in:
Lianmin Zheng
2025-03-03 05:32:30 -08:00
committed by GitHub
parent a7000a7650
commit 6b45a21d16
20 changed files with 203 additions and 210 deletions

View File

@@ -80,6 +80,12 @@ nvcc_flags = [
"-std=c++17",
"-use_fast_math",
"-DFLASHINFER_ENABLE_F16",
"-DCUTLASS_VERSIONS_GENERATED",
"-DCUTE_USE_PACKED_TUPLE=1",
"-DCUTLASS_TEST_LEVEL=0",
"-DCUTLASS_TEST_ENABLE_CACHED_RESULTS=1",
"-DCUTLASS_DEBUG_TRACE_LEVEL=0",
"--ptxas-options=-v",
"-Xcompiler=-Wconversion",
"-Xcompiler=-fno-strict-aliasing",
]
@@ -91,18 +97,18 @@ nvcc_flags_fp8 = [
sources = [
"src/sgl-kernel/torch_extension.cc",
"src/sgl-kernel/csrc/trt_reduce_internal.cu",
"src/sgl-kernel/csrc/trt_reduce_kernel.cu",
"src/sgl-kernel/csrc/moe_align_kernel.cu",
"src/sgl-kernel/csrc/int8_gemm_kernel.cu",
"src/sgl-kernel/csrc/fp8_gemm_kernel.cu",
"src/sgl-kernel/csrc/fp8_blockwise_gemm_kernel.cu",
"src/sgl-kernel/csrc/activation/fused_add_rms_norm_kernel.cu",
"src/sgl-kernel/csrc/allreduce/trt_reduce_internal.cu",
"src/sgl-kernel/csrc/allreduce/trt_reduce_kernel.cu",
"src/sgl-kernel/csrc/gemm/cublas_grouped_gemm.cu",
"src/sgl-kernel/csrc/gemm/fp8_gemm_kernel.cu",
"src/sgl-kernel/csrc/gemm/fp8_blockwise_gemm_kernel.cu",
"src/sgl-kernel/csrc/gemm/int8_gemm_kernel.cu",
"src/sgl-kernel/csrc/gemm/per_token_group_quant_fp8.cu",
"src/sgl-kernel/csrc/moe/moe_align_kernel.cu",
"src/sgl-kernel/csrc/speculative/eagle_utils.cu",
"src/sgl-kernel/csrc/speculative/speculative_sampling.cu",
"src/sgl-kernel/csrc/lightning_attention_decode_kernel.cu",
"src/sgl-kernel/csrc/fused_add_rms_norm_kernel.cu",
"src/sgl-kernel/csrc/eagle_utils.cu",
"src/sgl-kernel/csrc/speculative_sampling.cu",
"src/sgl-kernel/csrc/per_token_group_quant_fp8.cu",
"src/sgl-kernel/csrc/cublas_grouped_gemm.cu",
"3rdparty/flashinfer/csrc/activation.cu",
"3rdparty/flashinfer/csrc/bmm_fp8.cu",
"3rdparty/flashinfer/csrc/norm.cu",