support w8a8 fp8 kernel with CUTLASS (#3047)

Co-authored-by: yych0745 <1398089567@qq.com>
This commit is contained in:
HandH1998
2025-01-26 15:46:51 +08:00
committed by GitHub
parent 95f789adb0
commit 82392da830
8 changed files with 881 additions and 0 deletions

View File

@@ -56,6 +56,7 @@ include_dirs = [
turbomind.resolve(),
turbomind.resolve() / "src",
]
nvcc_flags = [
"-DNDEBUG",
f"-DOPERATOR_NAMESPACE={operator_namespace}",
@@ -82,6 +83,7 @@ sources = [
"src/sgl-kernel/csrc/trt_reduce_kernel.cu",
"src/sgl-kernel/csrc/moe_align_kernel.cu",
"src/sgl-kernel/csrc/int8_gemm_kernel.cu",
"src/sgl-kernel/csrc/fp8_gemm_kernel.cu",
"src/sgl-kernel/csrc/lightning_attention_decode_kernel.cu",
"src/sgl-kernel/csrc/rotary_embedding.cu",
"3rdparty/flashinfer/csrc/activation.cu",