Add awq dequantize kernel to sgl with 1x to 3x speedup (#4104)

This commit is contained in:
Rex
2025-03-12 00:10:02 -07:00
committed by GitHub
parent e0917e6bd0
commit 07f944631e
8 changed files with 324 additions and 0 deletions

View File

@@ -112,6 +112,7 @@ void apply_rope_pos_ids_cos_sin_cache(
/*
* From csrc/gemm
*/
torch::Tensor awq_dequantize(torch::Tensor qweight, torch::Tensor scales, torch::Tensor qzeros);
torch::Tensor int8_scaled_mm(
const torch::Tensor& mat_a,
const torch::Tensor& mat_b,