diff --git a/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py b/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py index 1f27b3393..f273c55cb 100644 --- a/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py +++ b/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py @@ -84,9 +84,7 @@ class DeepEPBuffer: num_nvl_bytes, num_rdma_bytes, low_latency_mode=deepep_mode.enable_low_latency(), - num_qps_per_rank=( - num_experts // group.size() if deepep_mode.enable_low_latency() else 1 - ), + num_qps_per_rank=(max(num_experts // group.size(), Buffer.num_sms // 2)), ) return cls._buffer