[Bug] add flashinfer bool check for fusedmoe in Qwen moe models (#7723)

This commit is contained in:
yilian49
2025-07-03 11:32:11 -07:00
committed by GitHub
parent 0099172327
commit c01a1df588
2 changed files with 18 additions and 0 deletions

View File

@@ -143,6 +143,15 @@ class Qwen2MoeSparseMoeBlock(nn.Module):
renormalize=config.norm_topk_prob,
quant_config=quant_config,
prefix=add_prefix("experts", prefix),
# Additional args for FusedMoE
**(
dict(
enable_flashinfer_moe=True,
enable_ep_moe=global_server_args_dict["enable_ep_moe"],
)
if global_server_args_dict["enable_flashinfer_moe"]
else {}
),
)
self.gate = ReplicatedLinear(

View File

@@ -117,6 +117,15 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
if global_server_args_dict["enable_deepep_moe"]
else {}
),
# Additional args for FusedMoE
**(
dict(
enable_flashinfer_moe=True,
enable_ep_moe=global_server_args_dict["enable_ep_moe"],
)
if global_server_args_dict["enable_flashinfer_moe"]
else {}
),
)
self.gate = ReplicatedLinear(