add qwen3_moe

This commit is contained in:
Chranos
2026-02-10 18:30:48 +08:00
parent fa0219fbf8
commit 934ed88691

View File

@@ -153,23 +153,25 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
def forward_mlu(
self,
layer: torch.nn.Module,
x: torch.Tensor,
w1: torch.Tensor,
w2: torch.Tensor,
router_logits: torch.Tensor,
top_k: int,
renormalize: bool,
use_grouped_topk: bool,
num_expert_group: Optional[int],
topk_group: Optional[int],
topk_group: Optional[int] = None,
num_expert_group: Optional[int] = None,
custom_routing_function: Optional[Callable] = None,
) -> torch.Tensor:
from vllm._mlu_ops import fused_moe
assert use_grouped_topk is False and num_expert_group is None and topk_group is None, \
f"Following params: use_grouped_topk, num_expert_group, topk_group are not support yet."
assert use_grouped_topk is False and num_expert_group is None \
and topk_group is None, \
"Following params: use_grouped_topk, num_expert_group, " \
"topk_group are not supported yet."
return fused_moe(x,
router_logits,
w1, w2,
layer.w13_weight, layer.w2_weight,
None, None, # bias1, bias2
None, # residual
None, # input_smooth