Revert "[1/2] sgl-kernel: Fuse routed scaling factor into select_experts" (#8706)
This commit is contained in:
@@ -19,10 +19,7 @@ from sglang.srt.layers.moe.topk import biased_grouped_topk
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("num_fused_shared_experts", [0, 1, 2])
|
||||
@pytest.mark.parametrize("apply_routed_scaling_factor_on_output", [True, False])
|
||||
def test_moe_fused_gate_combined(
|
||||
seq_length, params, num_fused_shared_experts, apply_routed_scaling_factor_on_output
|
||||
):
|
||||
def test_moe_fused_gate_combined(seq_length, params, num_fused_shared_experts):
|
||||
num_experts, num_expert_group, topk_group, topk = params
|
||||
dtype = torch.float32
|
||||
|
||||
@@ -40,7 +37,6 @@ def test_moe_fused_gate_combined(
|
||||
topk=topk,
|
||||
num_fused_shared_experts=num_fused_shared_experts,
|
||||
routed_scaling_factor=2.5,
|
||||
apply_routed_scaling_factor_on_output=apply_routed_scaling_factor_on_output,
|
||||
)
|
||||
ref_output, ref_indices = biased_grouped_topk(
|
||||
scores,
|
||||
@@ -52,7 +48,6 @@ def test_moe_fused_gate_combined(
|
||||
topk_group=topk_group,
|
||||
num_fused_shared_experts=num_fused_shared_experts,
|
||||
routed_scaling_factor=2.5,
|
||||
apply_routed_scaling_factor_on_output=apply_routed_scaling_factor_on_output,
|
||||
)
|
||||
|
||||
# When num_fused_shared_experts > 0, ignore the comparison of the last topk dimension
|
||||
|
||||
Reference in New Issue
Block a user