simplify the control logic for using shared experts fusion (#5504)

This commit is contained in:
Xiaoyu Zhang
2025-04-20 04:17:35 +08:00
committed by GitHub
parent bf86c5e990
commit d58e354472
16 changed files with 69 additions and 54 deletions

View File

@@ -136,6 +136,7 @@ class EPMoE(torch.nn.Module):
correction_bias: Optional[torch.Tensor] = None,
custom_routing_function: Optional[Callable] = None,
activation: str = "silu",
routed_scaling_factor: Optional[float] = None,
):
super().__init__()
@@ -164,6 +165,7 @@ class EPMoE(torch.nn.Module):
self.correction_bias = correction_bias
self.custom_routing_function = custom_routing_function
self.activation = activation
self.routed_scaling_factor = routed_scaling_factor
if quant_config is None:
self.quant_method: Optional[QuantizeMethodBase] = UnquantizedEPMoEMethod()
@@ -215,6 +217,7 @@ class EPMoE(torch.nn.Module):
num_expert_group=self.num_expert_group,
correction_bias=self.correction_bias,
custom_routing_function=self.custom_routing_function,
routed_scaling_factor=self.routed_scaling_factor,
)
reorder_topk_ids, src2dst, seg_indptr = run_moe_ep_preproess(