Add awq dequantize kernel to sgl with 1x to 3x speedup (#4104)

This commit is contained in:
Rex
2025-03-12 00:10:02 -07:00
committed by GitHub
parent e0917e6bd0
commit 07f944631e
8 changed files with 324 additions and 0 deletions

View File

@@ -150,6 +150,7 @@ sources = [
"csrc/elementwise/rope.cu",
"csrc/gemm/bmm_fp8.cu",
"csrc/gemm/cublas_grouped_gemm.cu",
"csrc/gemm/awq_kernel.cu",
"csrc/gemm/fp8_gemm_kernel.cu",
"csrc/gemm/fp8_blockwise_gemm_kernel.cu",
"csrc/gemm/int8_gemm_kernel.cu",