[Bug] add flashinfer bool check for fusedmoe in Qwen moe models (#7723)
This commit is contained in:
@@ -143,6 +143,15 @@ class Qwen2MoeSparseMoeBlock(nn.Module):
|
||||
renormalize=config.norm_topk_prob,
|
||||
quant_config=quant_config,
|
||||
prefix=add_prefix("experts", prefix),
|
||||
# Additional args for FusedMoE
|
||||
**(
|
||||
dict(
|
||||
enable_flashinfer_moe=True,
|
||||
enable_ep_moe=global_server_args_dict["enable_ep_moe"],
|
||||
)
|
||||
if global_server_args_dict["enable_flashinfer_moe"]
|
||||
else {}
|
||||
),
|
||||
)
|
||||
|
||||
self.gate = ReplicatedLinear(
|
||||
|
||||
@@ -117,6 +117,15 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
|
||||
if global_server_args_dict["enable_deepep_moe"]
|
||||
else {}
|
||||
),
|
||||
# Additional args for FusedMoE
|
||||
**(
|
||||
dict(
|
||||
enable_flashinfer_moe=True,
|
||||
enable_ep_moe=global_server_args_dict["enable_ep_moe"],
|
||||
)
|
||||
if global_server_args_dict["enable_flashinfer_moe"]
|
||||
else {}
|
||||
),
|
||||
)
|
||||
|
||||
self.gate = ReplicatedLinear(
|
||||
|
||||
Reference in New Issue
Block a user