diff --git a/python/sglang/srt/models/qwen2_moe.py b/python/sglang/srt/models/qwen2_moe.py index 95f0fcb70..001b3a336 100644 --- a/python/sglang/srt/models/qwen2_moe.py +++ b/python/sglang/srt/models/qwen2_moe.py @@ -143,6 +143,15 @@ class Qwen2MoeSparseMoeBlock(nn.Module): renormalize=config.norm_topk_prob, quant_config=quant_config, prefix=add_prefix("experts", prefix), + # Additional args for FusedMoE + **( + dict( + enable_flashinfer_moe=True, + enable_ep_moe=global_server_args_dict["enable_ep_moe"], + ) + if global_server_args_dict["enable_flashinfer_moe"] + else {} + ), ) self.gate = ReplicatedLinear( diff --git a/python/sglang/srt/models/qwen3_moe.py b/python/sglang/srt/models/qwen3_moe.py index 5a2844438..9d5ce6103 100644 --- a/python/sglang/srt/models/qwen3_moe.py +++ b/python/sglang/srt/models/qwen3_moe.py @@ -117,6 +117,15 @@ class Qwen3MoeSparseMoeBlock(nn.Module): if global_server_args_dict["enable_deepep_moe"] else {} ), + # Additional args for FusedMoE + **( + dict( + enable_flashinfer_moe=True, + enable_ep_moe=global_server_args_dict["enable_ep_moe"], + ) + if global_server_args_dict["enable_flashinfer_moe"] + else {} + ), ) self.gate = ReplicatedLinear(