Add awq dequantize kernel to sgl with 1x to 3x speedup (#4104)

This commit is contained in:
Rex
2025-03-12 00:10:02 -07:00
committed by GitHub
parent e0917e6bd0
commit 07f944631e
8 changed files with 324 additions and 0 deletions

View File

@@ -23,6 +23,7 @@ from sgl_kernel.elementwise import (
silu_and_mul,
)
from sgl_kernel.gemm import (
awq_dequantize,
bmm_fp8,
cublas_grouped_gemm,
fp8_blockwise_scaled_mm,