add qwen3_moe

This commit is contained in:
Chranos
2026-02-10 18:30:48 +08:00
parent 6479429662
commit 6f6997bafb

View File

@@ -153,23 +153,25 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
def forward_mlu( def forward_mlu(
self, self,
layer: torch.nn.Module,
x: torch.Tensor, x: torch.Tensor,
w1: torch.Tensor,
w2: torch.Tensor,
router_logits: torch.Tensor, router_logits: torch.Tensor,
top_k: int, top_k: int,
renormalize: bool, renormalize: bool,
use_grouped_topk: bool, use_grouped_topk: bool,
num_expert_group: Optional[int], topk_group: Optional[int] = None,
topk_group: Optional[int], num_expert_group: Optional[int] = None,
custom_routing_function: Optional[Callable] = None,
) -> torch.Tensor: ) -> torch.Tensor:
from vllm._mlu_ops import fused_moe from vllm._mlu_ops import fused_moe
assert use_grouped_topk is False and num_expert_group is None and topk_group is None, \ assert use_grouped_topk is False and num_expert_group is None \
f"Following params: use_grouped_topk, num_expert_group, topk_group are not support yet." and topk_group is None, \
"Following params: use_grouped_topk, num_expert_group, " \
"topk_group are not supported yet."
return fused_moe(x, return fused_moe(x,
router_logits, router_logits,
w1, w2, layer.w13_weight, layer.w2_weight,
None, None, # bias1, bias2 None, None, # bias1, bias2
None, # residual None, # residual
None, # input_smooth None, # input_smooth