[sgl-kernel][1/N]Support Expert Specialization Grouped GEMM (#11432)

Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com> Co-authored-by: PGFLMG <1106310035@qq.com> Co-authored-by: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com>
2025-10-13 11:19:21 +08:00
parent 8e776c78a1
commit 9a30914e94
11 changed files with 1473 additions and 0 deletions
--- a/sgl-kernel/csrc/common_extension.cc
+++ b/sgl-kernel/csrc/common_extension.cc
@@ -531,6 +531,14 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
      "bool silu_activation,"
      "int pad_slot_id) -> ()");
  m.impl("causal_conv1d_fwd", torch::kCUDA, &causal_conv1d_fwd);
+
+  /*
+   * From csrc/expert_sepcialization
+   */
+  m.def(
+      "es_fp8_blockwise_scaled_grouped_mm(Tensor output, Tensor a, Tensor b, Tensor scales_a, Tensor scales_b, Tensor "
+      "stride_a, Tensor stride_b, Tensor stride_d, Tensor problem_sizes, Tensor expert_offsets) -> ()");
+  m.impl("es_fp8_blockwise_scaled_grouped_mm", &es_fp8_blockwise_scaled_grouped_mm);
 }

 REGISTER_EXTENSION(common_ops)