[1/N] MoE Refactor: refactor select_experts (#7966)

2025-07-19 00:51:15 -07:00
parent cfab0ff6e2
commit 15ad6c9086
39 changed files with 556 additions and 871 deletions
--- a/test/srt/test_block_int8.py
+++ b/test/srt/test_block_int8.py
@@ -5,6 +5,7 @@ import torch

 from sglang.srt.layers.activation import SiluAndMul
 from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
+from sglang.srt.layers.moe.topk import select_experts
 from sglang.test.test_utils import CustomTestCase


@@ -171,14 +172,18 @@ class TestW8A8BlockINT8FusedMoE(CustomTestCase):

        score = torch.randn((M, E), dtype=dtype)

+        topk_output = select_experts(
+            hidden_states=a,
+            router_logits=score,
+            top_k=topk,
+        )
+
        with torch.inference_mode():
            out = fused_moe(
                a,
                w1,
                w2,
-                score,
-                topk,
-                renormalize=False,
+                topk_output,
                use_int8_w8a8=True,
                w1_scale=w1_s,
                w2_scale=w2_s,