[sgl-kernel][1/N]Support Expert Specialization Grouped GEMM (#11432)

Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>
Co-authored-by: PGFLMG <1106310035@qq.com>
Co-authored-by: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com>
This commit is contained in:
Qi Yuhang
2025-10-13 11:19:21 +08:00
committed by GitHub
parent 8e776c78a1
commit 9a30914e94
11 changed files with 1473 additions and 0 deletions

View File

@@ -531,6 +531,14 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
"bool silu_activation,"
"int pad_slot_id) -> ()");
m.impl("causal_conv1d_fwd", torch::kCUDA, &causal_conv1d_fwd);
/*
* From csrc/expert_sepcialization
*/
m.def(
"es_fp8_blockwise_scaled_grouped_mm(Tensor output, Tensor a, Tensor b, Tensor scales_a, Tensor scales_b, Tensor "
"stride_a, Tensor stride_b, Tensor stride_d, Tensor problem_sizes, Tensor expert_offsets) -> ()");
m.impl("es_fp8_blockwise_scaled_grouped_mm", &es_fp8_blockwise_scaled_grouped_mm);
}
REGISTER_EXTENSION(common_ops)