From 59d0bf012f461b7c0040f70f86f11aabbb8ea84a Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Tue, 29 Jul 2025 13:51:38 +0800 Subject: [PATCH] Tiny add warnings for DeepEP when it is suboptimal (#8426) --- .../srt/layers/moe/ep_moe/token_dispatcher.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py b/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py index b1aee3a93..c8cdfaa26 100644 --- a/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py +++ b/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py @@ -157,6 +157,20 @@ class DeepEPBuffer: else: raise NotImplementedError + total_num_sms = torch.cuda.get_device_properties( + device="cuda" + ).multi_processor_count + if ( + (deepep_mode != DeepEPMode.low_latency) + and not global_server_args_dict["enable_two_batch_overlap"] + and (DeepEPConfig.get_instance().num_sms < total_num_sms // 2) + ): + logger.warning( + f"Only use {DeepEPConfig.get_instance().num_sms} SMs for DeepEP communication. " + f"This may result in highly suboptimal performance. " + f"Consider using --deepep-config to change the behavior." + ) + cls._buffer = Buffer( group, num_nvl_bytes,