From 934ed88691b49d7a5f3bed452c09cdec00274712 Mon Sep 17 00:00:00 2001 From: Chranos <826995883@qq.com> Date: Tue, 10 Feb 2026 18:30:48 +0800 Subject: [PATCH] add qwen3_moe --- .../model_executor/layers/fused_moe/layer.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/vllm-v0.6.2/vllm/model_executor/layers/fused_moe/layer.py b/vllm-v0.6.2/vllm/model_executor/layers/fused_moe/layer.py index b4389d3..a540770 100644 --- a/vllm-v0.6.2/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm-v0.6.2/vllm/model_executor/layers/fused_moe/layer.py @@ -153,23 +153,25 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): def forward_mlu( self, + layer: torch.nn.Module, x: torch.Tensor, - w1: torch.Tensor, - w2: torch.Tensor, router_logits: torch.Tensor, top_k: int, renormalize: bool, use_grouped_topk: bool, - num_expert_group: Optional[int], - topk_group: Optional[int], + topk_group: Optional[int] = None, + num_expert_group: Optional[int] = None, + custom_routing_function: Optional[Callable] = None, ) -> torch.Tensor: from vllm._mlu_ops import fused_moe - assert use_grouped_topk is False and num_expert_group is None and topk_group is None, \ - f"Following params: use_grouped_topk, num_expert_group, topk_group are not support yet." + assert use_grouped_topk is False and num_expert_group is None \ + and topk_group is None, \ + "Following params: use_grouped_topk, num_expert_group, " \ + "topk_group are not supported yet." return fused_moe(x, router_logits, - w1, w2, + layer.w13_weight, layer.w2_weight, None, None, # bias1, bias2 None, # residual None, # input_smooth