forked from EngineX-Cambricon/enginex-mlu370-vllm
add qwen3_moe
This commit is contained in:
@@ -153,23 +153,25 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
|
||||
|
||||
def forward_mlu(
|
||||
self,
|
||||
layer: torch.nn.Module,
|
||||
x: torch.Tensor,
|
||||
w1: torch.Tensor,
|
||||
w2: torch.Tensor,
|
||||
router_logits: torch.Tensor,
|
||||
top_k: int,
|
||||
renormalize: bool,
|
||||
use_grouped_topk: bool,
|
||||
num_expert_group: Optional[int],
|
||||
topk_group: Optional[int],
|
||||
topk_group: Optional[int] = None,
|
||||
num_expert_group: Optional[int] = None,
|
||||
custom_routing_function: Optional[Callable] = None,
|
||||
) -> torch.Tensor:
|
||||
from vllm._mlu_ops import fused_moe
|
||||
|
||||
assert use_grouped_topk is False and num_expert_group is None and topk_group is None, \
|
||||
f"Following params: use_grouped_topk, num_expert_group, topk_group are not support yet."
|
||||
assert use_grouped_topk is False and num_expert_group is None \
|
||||
and topk_group is None, \
|
||||
"Following params: use_grouped_topk, num_expert_group, " \
|
||||
"topk_group are not supported yet."
|
||||
return fused_moe(x,
|
||||
router_logits,
|
||||
w1, w2,
|
||||
layer.w13_weight, layer.w2_weight,
|
||||
None, None, # bias1, bias2
|
||||
None, # residual
|
||||
None, # input_smooth
|
||||
|
||||
Reference in New Issue
Block a user