[1/2][resubmit again] sgl-kernel: Fuse routed scaling factor into moe_fused_gate (#9088)

2025-08-12 20:12:38 -07:00
parent 8723b4f146
commit 13c48dcf88
4 changed files with 32 additions and 11 deletions
--- a/sgl-kernel/include/sgl_kernel_ops.h
+++ b/sgl-kernel/include/sgl_kernel_ops.h
@@ -247,7 +247,8 @@ std::vector<at::Tensor> moe_fused_gate(
    int64_t topk_group,
    int64_t topk,
    int64_t num_fused_shared_experts,
-    double routed_scaling_factor);
+    double routed_scaling_factor,
+    bool apply_routed_scaling_factor_on_output);

 void fp8_blockwise_scaled_grouped_mm(
    torch::Tensor& output,