add qwen3_moe
This commit is contained in:
@@ -153,23 +153,25 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
|
|||||||
|
|
||||||
def forward_mlu(
|
def forward_mlu(
|
||||||
self,
|
self,
|
||||||
|
layer: torch.nn.Module,
|
||||||
x: torch.Tensor,
|
x: torch.Tensor,
|
||||||
w1: torch.Tensor,
|
|
||||||
w2: torch.Tensor,
|
|
||||||
router_logits: torch.Tensor,
|
router_logits: torch.Tensor,
|
||||||
top_k: int,
|
top_k: int,
|
||||||
renormalize: bool,
|
renormalize: bool,
|
||||||
use_grouped_topk: bool,
|
use_grouped_topk: bool,
|
||||||
num_expert_group: Optional[int],
|
topk_group: Optional[int] = None,
|
||||||
topk_group: Optional[int],
|
num_expert_group: Optional[int] = None,
|
||||||
|
custom_routing_function: Optional[Callable] = None,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
from vllm._mlu_ops import fused_moe
|
from vllm._mlu_ops import fused_moe
|
||||||
|
|
||||||
assert use_grouped_topk is False and num_expert_group is None and topk_group is None, \
|
assert use_grouped_topk is False and num_expert_group is None \
|
||||||
f"Following params: use_grouped_topk, num_expert_group, topk_group are not support yet."
|
and topk_group is None, \
|
||||||
|
"Following params: use_grouped_topk, num_expert_group, " \
|
||||||
|
"topk_group are not supported yet."
|
||||||
return fused_moe(x,
|
return fused_moe(x,
|
||||||
router_logits,
|
router_logits,
|
||||||
w1, w2,
|
layer.w13_weight, layer.w2_weight,
|
||||||
None, None, # bias1, bias2
|
None, None, # bias1, bias2
|
||||||
None, # residual
|
None, # residual
|
||||||
None, # input_smooth
|
None, # input_smooth
|
||||||
|
|||||||
Reference in New Issue
Block a user