[1/N] MoE Refactor: refactor select_experts (#7966)

2025-07-19 00:51:15 -07:00
parent cfab0ff6e2
commit 15ad6c9086
39 changed files with 556 additions and 871 deletions
--- a/test/srt/test_triton_moe_wna16.py
+++ b/test/srt/test_triton_moe_wna16.py
@@ -5,6 +5,7 @@ import torch

 from sglang.srt.layers.activation import SiluAndMul
 from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
+from sglang.srt.layers.moe.topk import select_experts

 NUM_EXPERTS = [8, 64]
 TOP_KS = [2, 6]
@@ -219,13 +220,17 @@ def test_fused_moe_wn16(
        if has_zp:
            w_qzeros[expert_id] = qzeros

+    topk_output = select_experts(
+        hidden_states=a,
+        router_logits=score,
+        top_k=topk,
+    )
+
    triton_output = fused_moe(
        a,
        w1_qweight,
        w2_qweight,
-        score,
-        topk,
-        renormalize=False,
+        topk_output,
        use_int4_w4a16=weight_bits == 4,
        use_int8_w8a16=weight_bits == 8,
        w1_scale=w1_scales,