From 59d0bf012f461b7c0040f70f86f11aabbb8ea84a Mon Sep 17 00:00:00 2001
From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com>
Date: Tue, 29 Jul 2025 13:51:38 +0800
Subject: [PATCH] Tiny add warnings for DeepEP when it is suboptimal (#8426)

---
 .../srt/layers/moe/ep_moe/token_dispatcher.py      | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py b/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py
index b1aee3a93..c8cdfaa26 100644
--- a/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py
+++ b/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py
@@ -157,6 +157,20 @@ class DeepEPBuffer:
         else:
             raise NotImplementedError
 
+        total_num_sms = torch.cuda.get_device_properties(
+            device="cuda"
+        ).multi_processor_count
+        if (
+            (deepep_mode != DeepEPMode.low_latency)
+            and not global_server_args_dict["enable_two_batch_overlap"]
+            and (DeepEPConfig.get_instance().num_sms < total_num_sms // 2)
+        ):
+            logger.warning(
+                f"Only use {DeepEPConfig.get_instance().num_sms} SMs for DeepEP communication. "
+                f"This may result in highly suboptimal performance. "
+                f"Consider using --deepep-config to change the behavior."
+            )
+
         cls._buffer = Buffer(
             group,
             num_nvl_bytes,