[sgl-kernel][1/N]Support Expert Specialization Grouped GEMM (#11432)

Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>
Co-authored-by: PGFLMG <1106310035@qq.com>
Co-authored-by: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com>
This commit is contained in:
Qi Yuhang
2025-10-13 11:19:21 +08:00
committed by GitHub
parent 8e776c78a1
commit 9a30914e94
11 changed files with 1473 additions and 0 deletions

View File

@@ -244,6 +244,7 @@ from sgl_kernel.elementwise import (
rmsnorm,
silu_and_mul,
)
from sgl_kernel.expert_specilization import es_fp8_blockwise_scaled_grouped_mm
from sgl_kernel.fused_moe import fused_marlin_moe
from sgl_kernel.gemm import (
awq_dequantize,

View File

@@ -0,0 +1,27 @@
import torch
def es_fp8_blockwise_scaled_grouped_mm(
output,
a,
b,
scales_a,
scales_b,
stride_a,
stride_b,
stride_d,
problem_sizes,
expert_offsets,
):
torch.ops.sgl_kernel.es_fp8_blockwise_scaled_grouped_mm.default(
output,
a,
b,
scales_a,
scales_b,
stride_a,
stride_b,
stride_d,
problem_sizes,
expert_offsets,
)