Add awq dequantize kernel to sgl with 1x to 3x speedup (#4104)
This commit is contained in:
@@ -150,6 +150,7 @@ sources = [
|
||||
"csrc/elementwise/rope.cu",
|
||||
"csrc/gemm/bmm_fp8.cu",
|
||||
"csrc/gemm/cublas_grouped_gemm.cu",
|
||||
"csrc/gemm/awq_kernel.cu",
|
||||
"csrc/gemm/fp8_gemm_kernel.cu",
|
||||
"csrc/gemm/fp8_blockwise_gemm_kernel.cu",
|
||||
"csrc/gemm/int8_gemm_kernel.cu",
|
||||
|
||||
Reference in New Issue
Block a user