From a564e001b532e5d00be2296ca305e67da506d0b0 Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Wed, 28 May 2025 06:12:54 +0800 Subject: [PATCH] Fix DeepEP error in Qwen 3 MoE models (#6673) --- .../srt/layers/moe/ep_moe/token_dispatcher.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py b/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py index 663ca1877..e89a805c1 100644 --- a/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py +++ b/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py @@ -93,17 +93,20 @@ class DeepEPBuffer: ), num_rdma_bytes, ) + + if deepep_mode == DeepEPMode.normal: + num_qps_per_rank = DeepEPConfig.get_instance().num_sms // 2 + elif deepep_mode in [DeepEPMode.low_latency, DeepEPMode.auto]: + num_qps_per_rank = num_experts // group.size() + else: + raise NotImplementedError + cls._buffer = Buffer( group, num_nvl_bytes, num_rdma_bytes, low_latency_mode=deepep_mode.enable_low_latency(), - num_qps_per_rank=( - max( - num_experts // group.size(), - DeepEPConfig.get_instance().num_sms // 2, - ) - ), + num_qps_per_rank=num_qps_per_rank, ) return cls._buffer