diff --git a/vllm-v0.6.2/vllm/model_executor/layers/fused_moe/layer.py b/vllm-v0.6.2/vllm/model_executor/layers/fused_moe/layer.py index b4389d3..a540770 100644 --- a/vllm-v0.6.2/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm-v0.6.2/vllm/model_executor/layers/fused_moe/layer.py @@ -153,23 +153,25 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): def forward_mlu( self, + layer: torch.nn.Module, x: torch.Tensor, - w1: torch.Tensor, - w2: torch.Tensor, router_logits: torch.Tensor, top_k: int, renormalize: bool, use_grouped_topk: bool, - num_expert_group: Optional[int], - topk_group: Optional[int], + topk_group: Optional[int] = None, + num_expert_group: Optional[int] = None, + custom_routing_function: Optional[Callable] = None, ) -> torch.Tensor: from vllm._mlu_ops import fused_moe - assert use_grouped_topk is False and num_expert_group is None and topk_group is None, \ - f"Following params: use_grouped_topk, num_expert_group, topk_group are not support yet." + assert use_grouped_topk is False and num_expert_group is None \ + and topk_group is None, \ + "Following params: use_grouped_topk, num_expert_group, " \ + "topk_group are not supported yet." return fused_moe(x, router_logits, - w1, w2, + layer.w13_weight, layer.w2_weight, None, None, # bias1, bias2 None, # residual None, # input_smooth