From c01a1df5888c63a010a26ddb297e052d96da7ad7 Mon Sep 17 00:00:00 2001 From: yilian49 <43861414+yilian49@users.noreply.github.com> Date: Thu, 3 Jul 2025 11:32:11 -0700 Subject: [PATCH] [Bug] add flashinfer bool check for fusedmoe in Qwen moe models (#7723) --- python/sglang/srt/models/qwen2_moe.py | 9 +++++++++ python/sglang/srt/models/qwen3_moe.py | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/python/sglang/srt/models/qwen2_moe.py b/python/sglang/srt/models/qwen2_moe.py index 95f0fcb70..001b3a336 100644 --- a/python/sglang/srt/models/qwen2_moe.py +++ b/python/sglang/srt/models/qwen2_moe.py @@ -143,6 +143,15 @@ class Qwen2MoeSparseMoeBlock(nn.Module): renormalize=config.norm_topk_prob, quant_config=quant_config, prefix=add_prefix("experts", prefix), + # Additional args for FusedMoE + **( + dict( + enable_flashinfer_moe=True, + enable_ep_moe=global_server_args_dict["enable_ep_moe"], + ) + if global_server_args_dict["enable_flashinfer_moe"] + else {} + ), ) self.gate = ReplicatedLinear( diff --git a/python/sglang/srt/models/qwen3_moe.py b/python/sglang/srt/models/qwen3_moe.py index 5a2844438..9d5ce6103 100644 --- a/python/sglang/srt/models/qwen3_moe.py +++ b/python/sglang/srt/models/qwen3_moe.py @@ -117,6 +117,15 @@ class Qwen3MoeSparseMoeBlock(nn.Module): if global_server_args_dict["enable_deepep_moe"] else {} ), + # Additional args for FusedMoE + **( + dict( + enable_flashinfer_moe=True, + enable_ep_moe=global_server_args_dict["enable_ep_moe"], + ) + if global_server_args_dict["enable_flashinfer_moe"] + else {} + ), ) self.gate = ReplicatedLinear(