Support FP4 gemm (1/2) (#3899)
This commit is contained in:
@@ -26,9 +26,11 @@ from sgl_kernel.gemm import (
|
||||
awq_dequantize,
|
||||
bmm_fp8,
|
||||
cublas_grouped_gemm,
|
||||
cutlass_scaled_fp4_mm,
|
||||
fp8_blockwise_scaled_mm,
|
||||
fp8_scaled_mm,
|
||||
int8_scaled_mm,
|
||||
scaled_fp4_quant,
|
||||
sgl_per_tensor_quant_fp8,
|
||||
sgl_per_token_group_quant_fp8,
|
||||
sgl_per_token_group_quant_int8,
|
||||
|
||||
Reference in New Issue
Block a user