diff --git a/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py b/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py index b1aee3a93..c8cdfaa26 100644 --- a/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py +++ b/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py @@ -157,6 +157,20 @@ class DeepEPBuffer: else: raise NotImplementedError + total_num_sms = torch.cuda.get_device_properties( + device="cuda" + ).multi_processor_count + if ( + (deepep_mode != DeepEPMode.low_latency) + and not global_server_args_dict["enable_two_batch_overlap"] + and (DeepEPConfig.get_instance().num_sms < total_num_sms // 2) + ): + logger.warning( + f"Only use {DeepEPConfig.get_instance().num_sms} SMs for DeepEP communication. " + f"This may result in highly suboptimal performance. " + f"Consider using --deepep-config to change the behavior." + ) + cls._buffer = Buffer( group, num_nvl_bytes,