[6/N] MoE Refactor: Cleanup MoE-related configs (#8849)

2025-08-14 21:14:53 -07:00
parent 584e1ab2d0
commit 295895120d
69 changed files with 956 additions and 1037 deletions
--- a/test/srt/test_fused_moe.py
+++ b/test/srt/test_fused_moe.py
@@ -6,7 +6,7 @@ from tqdm import tqdm

 from sglang.srt.layers.activation import SiluAndMul
 from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
-from sglang.srt.layers.moe.topk import select_experts
+from sglang.srt.layers.moe.topk import TopKConfig, select_experts
 from sglang.srt.layers.quantization.fp8_kernel import is_fp8_fnuz
 from sglang.srt.layers.quantization.fp8_utils import normalize_e4m3fn_to_e4m3fnuz
 from sglang.srt.utils import is_hip
@@ -136,19 +136,7 @@ class TestFusedMOE(CustomTestCase):
            topk_output = select_experts(
                hidden_states=a,
                router_logits=score,
-                top_k=topk,
-            )
-
-            sglang_output = fused_moe(
-                a,
-                w1,
-                w2,
-                topk_output,
-                use_fp8_w8a8=True,
-                w1_scale=w1_scale,
-                w2_scale=w2_scale,
-                a1_scale=a1_scale,
-                a2_scale=a2_scale,
+                topk_config=TopKConfig(top_k=topk, renormalize=False),
            )

            torch_output = self.torch_naive_moe(
@@ -162,6 +150,18 @@ class TestFusedMOE(CustomTestCase):
                a1_scale,
                a2_scale,
            )
+
+            sglang_output = fused_moe(
+                a,
+                w1,
+                w2,
+                topk_output,
+                use_fp8_w8a8=True,
+                w1_scale=w1_scale,
+                w2_scale=w2_scale,
+                a1_scale=a1_scale,
+                a2_scale=a2_scale,
+            )
            torch.testing.assert_close(
                sglang_output, torch_output, rtol=rtol, atol=atol
            )
@@ -174,7 +174,7 @@ class TestFusedMOE(CustomTestCase):
            topk_output = select_experts(
                hidden_states=a,
                router_logits=score,
-                top_k=topk,
+                topk_config=TopKConfig(top_k=topk, renormalize=False),
            )

            triton_output = fused_moe(a, w1, w2, topk_output)