Revert "[1/2][resubmit] sgl-kernel: Fuse routed scaling factor into m… (#9035)

2025-08-10 17:34:54 -07:00
parent f2887498f0
commit dd949ace23
6 changed files with 12 additions and 62 deletions
--- a/sgl-kernel/tests/test_moe_fused_gate.py
+++ b/sgl-kernel/tests/test_moe_fused_gate.py
@@ -19,10 +19,7 @@ from sglang.srt.layers.moe.topk import biased_grouped_topk
    ],
 )
@pytest.mark.parametrize("num_fused_shared_experts", [0, 1, 2])
-@pytest.mark.parametrize("apply_routed_scaling_factor_on_output", [True, False])
-def test_moe_fused_gate_combined(
-    seq_length, params, num_fused_shared_experts, apply_routed_scaling_factor_on_output
-):
+def test_moe_fused_gate_combined(seq_length, params, num_fused_shared_experts):
    num_experts, num_expert_group, topk_group, topk = params
    dtype = torch.float32

@@ -40,7 +37,6 @@ def test_moe_fused_gate_combined(
        topk=topk,
        num_fused_shared_experts=num_fused_shared_experts,
        routed_scaling_factor=2.5,
-        apply_routed_scaling_factor_on_output=apply_routed_scaling_factor_on_output,
    )
    ref_output, ref_indices = biased_grouped_topk(
        scores,
@@ -52,7 +48,6 @@ def test_moe_fused_gate_combined(
        topk_group=topk_group,
        num_fused_shared_experts=num_fused_shared_experts,
        routed_scaling_factor=2.5,
-        apply_routed_scaling_factor_on_output=apply_routed_scaling_factor_on_output,
    )

    # When num_fused_shared_experts > 0, ignore the comparison of the last topk dimension