[optimize] fuse renormalize into moe_topk_softmax (#7744)

Co-authored-by: ispobock <ispobaoke@gmail.com>
2025-07-04 03:42:44 +08:00
parent 6840a7bbb2
commit 2998c4bdf4
7 changed files with 254 additions and 101 deletions
--- a/sgl-kernel/benchmark/bench_moe_topk_softmax.py
+++ b/sgl-kernel/benchmark/bench_moe_topk_softmax.py
@@ -34,14 +34,10 @@ def sglang_topk_softmax(gating_output, topk):
    topk_indices = torch.empty(
        (num_tokens, topk), dtype=torch.int32, device=gating_output.device
    )
-    token_expert_indices = torch.empty(
-        (num_tokens, topk), dtype=torch.int32, device=gating_output.device
-    )

    topk_softmax(
        topk_weights=topk_weights,
        topk_ids=topk_indices,
-        token_expert_indices=token_expert_indices,
        gating_output=gating_output,
    )