Add deepseek style fused moe group gate selection kernel (#4530)

2025-03-29 11:51:45 -07:00
parent ddf8981d91
commit 45dcfc2e76
9 changed files with 616 additions and 1 deletions
--- a/sgl-kernel/csrc/torch_extension.cc
+++ b/sgl-kernel/csrc/torch_extension.cc
@@ -138,6 +138,11 @@ TORCH_LIBRARY_EXPAND(sgl_kernel, m) {
      "token_expert_indices, Tensor gating_output) -> ()");
  m.impl("topk_softmax", torch::kCUDA, &topk_softmax);

+  m.def(
+      "moe_fused_gate(Tensor input, Tensor bias, int num_expert_group, int topk_group, int topk) -> "
+      "(Tensor[])");
+  m.impl("moe_fused_gate", torch::kCUDA, &moe_fused_gate);
+
  /*
   * From csrc/speculative
   */