Support FP4 gemm (1/2) (#3899)

This commit is contained in:
Trevor Morris
2025-03-24 19:50:23 -07:00
committed by GitHub
parent 22c3702e1e
commit e9f8e42318
11 changed files with 1245 additions and 5 deletions

View File

@@ -26,9 +26,11 @@ from sgl_kernel.gemm import (
awq_dequantize,
bmm_fp8,
cublas_grouped_gemm,
cutlass_scaled_fp4_mm,
fp8_blockwise_scaled_mm,
fp8_scaled_mm,
int8_scaled_mm,
scaled_fp4_quant,
sgl_per_tensor_quant_fp8,
sgl_per_token_group_quant_fp8,
sgl_per_token_group_quant_int8,