Add awq dequantize kernel to sgl with 1x to 3x speedup (#4104)

2025-03-12 00:10:02 -07:00
parent e0917e6bd0
commit 07f944631e
8 changed files with 324 additions and 0 deletions
--- a/sgl-kernel/include/sgl_kernel_ops.h
+++ b/sgl-kernel/include/sgl_kernel_ops.h
@@ -112,6 +112,7 @@ void apply_rope_pos_ids_cos_sin_cache(
 /*
 * From csrc/gemm
 */
+torch::Tensor awq_dequantize(torch::Tensor qweight, torch::Tensor scales, torch::Tensor qzeros);
 torch::Tensor int8_scaled_mm(
    const torch::Tensor& mat_a,
    const torch::Tensor& mat_b,