[6/N] MoE Refactor: Cleanup MoE-related configs (#8849)

2025-08-14 21:14:53 -07:00
parent 584e1ab2d0
commit 295895120d
69 changed files with 956 additions and 1037 deletions
--- a/test/srt/quant/test_block_int8.py
+++ b/test/srt/quant/test_block_int8.py
@@ -5,7 +5,7 @@ import torch

 from sglang.srt.layers.activation import SiluAndMul
 from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
-from sglang.srt.layers.moe.topk import select_experts
+from sglang.srt.layers.moe.topk import TopKConfig, select_experts
 from sglang.test.test_utils import CustomTestCase


@@ -175,10 +175,13 @@ class TestW8A8BlockINT8FusedMoE(CustomTestCase):
        topk_output = select_experts(
            hidden_states=a,
            router_logits=score,
-            top_k=topk,
+            topk_config=TopKConfig(top_k=topk, renormalize=False),
        )

        with torch.inference_mode():
+            ref_out = torch_w8a8_block_int8_moe(
+                a, w1, w2, w1_s, w2_s, score, topk, block_size
+            )
            out = fused_moe(
                a,
                w1,
@@ -189,9 +192,6 @@ class TestW8A8BlockINT8FusedMoE(CustomTestCase):
                w2_scale=w2_s,
                block_shape=block_size,
            )
-            ref_out = torch_w8a8_block_int8_moe(
-                a, w1, w2, w1_s, w2_s, score, topk, block_size
-            )

        self.assertTrue(
            torch.mean(torch.abs(out.to(torch.float32) - ref_out.to(torch.float32)))
--- a/test/srt/quant/test_int8_kernel.py
+++ b/test/srt/quant/test_int8_kernel.py
@@ -5,7 +5,7 @@ import torch

 from sglang.srt.layers.activation import SiluAndMul
 from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
-from sglang.srt.layers.moe.topk import select_experts
+from sglang.srt.layers.moe.topk import TopKConfig, select_experts
 from sglang.srt.layers.quantization.int8_kernel import per_token_quant_int8
 from sglang.test.test_utils import CustomTestCase

@@ -118,7 +118,7 @@ class TestW8A8Int8FusedMoE(CustomTestCase):
            topk_output = select_experts(
                hidden_states=a,
                router_logits=score,
-                top_k=topk,
+                topk_config=TopKConfig(top_k=topk, renormalize=False),
            )
            out = fused_moe(
                a,