Tiny add warnings for DeepEP when it is suboptimal (#8426)

This commit is contained in:
fzyzcjy
2025-07-29 13:51:38 +08:00
committed by GitHub
parent 7df2c0c2db
commit 59d0bf012f

View File

@@ -157,6 +157,20 @@ class DeepEPBuffer:
else:
raise NotImplementedError
total_num_sms = torch.cuda.get_device_properties(
device="cuda"
).multi_processor_count
if (
(deepep_mode != DeepEPMode.low_latency)
and not global_server_args_dict["enable_two_batch_overlap"]
and (DeepEPConfig.get_instance().num_sms < total_num_sms // 2)
):
logger.warning(
f"Only use {DeepEPConfig.get_instance().num_sms} SMs for DeepEP communication. "
f"This may result in highly suboptimal performance. "
f"Consider using --deepep-config to change the behavior."
)
cls._buffer = Buffer(
group,
num_nvl_bytes,