Sgl kernel fused_moe_gate support n_shared_experts (#5440)

2025-04-18 14:05:15 +08:00
parent 53dcf38876
commit 8e09b37077
5 changed files with 140 additions and 38 deletions
--- a/sgl-kernel/include/sgl_kernel_ops.h
+++ b/sgl-kernel/include/sgl_kernel_ops.h
@@ -200,8 +200,14 @@ void topk_softmax(
    torch::Tensor& token_expert_indices,
    torch::Tensor& gating_output);

-std::vector<at::Tensor>
-moe_fused_gate(at::Tensor& input, at::Tensor& bias, int64_t num_expert_group, int64_t topk_group, int64_t topk);
+std::vector<at::Tensor> moe_fused_gate(
+    at::Tensor& input,
+    at::Tensor& bias,
+    int64_t num_expert_group,
+    int64_t topk_group,
+    int64_t topk,
+    int64_t n_share_experts_fusion,
+    double routed_scaling_factor);

 /*
 * From csrc/speculative