[1/2] Add FP8 Blockscale MoE CUTLASS kernel for Blackwell (#5281)

This commit is contained in:
Elfie Guo
2025-04-22 22:28:20 -07:00
committed by GitHub
parent 71d1785f2d
commit e62c49557d
8 changed files with 732 additions and 1 deletions

30
sgl-kernel/python/sgl_kernel/moe.py Normal file → Executable file
View File

@@ -60,3 +60,33 @@ def moe_fused_gate(
n_share_experts_fusion,
routed_scaling_factor,
)
def fp8_blockwise_scaled_grouped_mm(
output,
a,
b,
scales_a,
scales_b,
stride_a,
stride_b,
stride_c,
layout_sfa,
layout_sfb,
problem_sizes,
expert_offsets,
):
torch.ops.sgl_kernel.fp8_blockwise_scaled_grouped_mm.default(
output,
a,
b,
scales_a,
scales_b,
stride_a,
stride_b,
stride_c,
layout_sfa,
layout_sfb,
problem_sizes,
expert_offsets,
)