simplify the control logic for using shared experts fusion (#5504)
This commit is contained in:
@@ -136,6 +136,7 @@ class EPMoE(torch.nn.Module):
|
||||
correction_bias: Optional[torch.Tensor] = None,
|
||||
custom_routing_function: Optional[Callable] = None,
|
||||
activation: str = "silu",
|
||||
routed_scaling_factor: Optional[float] = None,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
@@ -164,6 +165,7 @@ class EPMoE(torch.nn.Module):
|
||||
self.correction_bias = correction_bias
|
||||
self.custom_routing_function = custom_routing_function
|
||||
self.activation = activation
|
||||
self.routed_scaling_factor = routed_scaling_factor
|
||||
|
||||
if quant_config is None:
|
||||
self.quant_method: Optional[QuantizeMethodBase] = UnquantizedEPMoEMethod()
|
||||
@@ -215,6 +217,7 @@ class EPMoE(torch.nn.Module):
|
||||
num_expert_group=self.num_expert_group,
|
||||
correction_bias=self.correction_bias,
|
||||
custom_routing_function=self.custom_routing_function,
|
||||
routed_scaling_factor=self.routed_scaling_factor,
|
||||
)
|
||||
|
||||
reorder_topk_ids, src2dst, seg_indptr = run_moe_ep_preproess(
|
||||
|
||||
Reference in New Issue
Block a user