[1/2] Support Qserve (#6457)

Co-authored-by: yych0745 <1398089567@qq.com>
Co-authored-by: sleepcoo <sleepcoo@gmail.com>
This commit is contained in:
HandH1998
2025-05-22 10:48:59 +08:00
committed by GitHub
parent 6ce0ed073b
commit 4d643f6c7a
10 changed files with 2086 additions and 0 deletions

View File

@@ -36,6 +36,8 @@ from sgl_kernel.gemm import (
fp8_blockwise_scaled_mm,
fp8_scaled_mm,
int8_scaled_mm,
qserve_w4a8_per_chn_gemm,
qserve_w4a8_per_group_gemm,
scaled_fp4_quant,
sgl_per_tensor_quant_fp8,
sgl_per_token_group_quant_fp8,