Revert "[1/2] sgl-kernel: Fuse routed scaling factor into select_experts" (#8706)

2025-08-02 20:14:30 +08:00
parent ac6962ccd6
commit f9f0138f80
5 changed files with 12 additions and 38 deletions
--- a/sgl-kernel/tests/test_moe_fused_gate.py
+++ b/sgl-kernel/tests/test_moe_fused_gate.py
@@ -19,10 +19,7 @@ from sglang.srt.layers.moe.topk import biased_grouped_topk
    ],
 )
@pytest.mark.parametrize("num_fused_shared_experts", [0, 1, 2])
-@pytest.mark.parametrize("apply_routed_scaling_factor_on_output", [True, False])
-def test_moe_fused_gate_combined(
-    seq_length, params, num_fused_shared_experts, apply_routed_scaling_factor_on_output
-):
+def test_moe_fused_gate_combined(seq_length, params, num_fused_shared_experts):
    num_experts, num_expert_group, topk_group, topk = params
    dtype = torch.float32

@@ -40,7 +37,6 @@ def test_moe_fused_gate_combined(
        topk=topk,
        num_fused_shared_experts=num_fused_shared_experts,
        routed_scaling_factor=2.5,
-        apply_routed_scaling_factor_on_output=apply_routed_scaling_factor_on_output,
    )
    ref_output, ref_indices = biased_grouped_topk(
        scores,
@@ -52,7 +48,6 @@ def test_moe_fused_gate_combined(
        topk_group=topk_group,
        num_fused_shared_experts=num_fused_shared_experts,
        routed_scaling_factor=2.5,
-        apply_routed_scaling_factor_on_output=apply_routed_scaling_factor_on_output,
    )

    # When num_fused_shared_experts > 0, ignore the comparison of the last topk dimension