[Bug] add flashinfer bool check for fusedmoe in Qwen moe models (#7723)
This commit is contained in:
@@ -143,6 +143,15 @@ class Qwen2MoeSparseMoeBlock(nn.Module):
|
|||||||
renormalize=config.norm_topk_prob,
|
renormalize=config.norm_topk_prob,
|
||||||
quant_config=quant_config,
|
quant_config=quant_config,
|
||||||
prefix=add_prefix("experts", prefix),
|
prefix=add_prefix("experts", prefix),
|
||||||
|
# Additional args for FusedMoE
|
||||||
|
**(
|
||||||
|
dict(
|
||||||
|
enable_flashinfer_moe=True,
|
||||||
|
enable_ep_moe=global_server_args_dict["enable_ep_moe"],
|
||||||
|
)
|
||||||
|
if global_server_args_dict["enable_flashinfer_moe"]
|
||||||
|
else {}
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
self.gate = ReplicatedLinear(
|
self.gate = ReplicatedLinear(
|
||||||
|
|||||||
@@ -117,6 +117,15 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
|
|||||||
if global_server_args_dict["enable_deepep_moe"]
|
if global_server_args_dict["enable_deepep_moe"]
|
||||||
else {}
|
else {}
|
||||||
),
|
),
|
||||||
|
# Additional args for FusedMoE
|
||||||
|
**(
|
||||||
|
dict(
|
||||||
|
enable_flashinfer_moe=True,
|
||||||
|
enable_ep_moe=global_server_args_dict["enable_ep_moe"],
|
||||||
|
)
|
||||||
|
if global_server_args_dict["enable_flashinfer_moe"]
|
||||||
|
else {}
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
self.gate = ReplicatedLinear(
|
self.gate = ReplicatedLinear(
|
||||||
|
|||||||
Reference in New Issue
Block a user