Tiny add warnings for DeepEP when it is suboptimal (#8426)
This commit is contained in:
@@ -157,6 +157,20 @@ class DeepEPBuffer:
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
total_num_sms = torch.cuda.get_device_properties(
|
||||
device="cuda"
|
||||
).multi_processor_count
|
||||
if (
|
||||
(deepep_mode != DeepEPMode.low_latency)
|
||||
and not global_server_args_dict["enable_two_batch_overlap"]
|
||||
and (DeepEPConfig.get_instance().num_sms < total_num_sms // 2)
|
||||
):
|
||||
logger.warning(
|
||||
f"Only use {DeepEPConfig.get_instance().num_sms} SMs for DeepEP communication. "
|
||||
f"This may result in highly suboptimal performance. "
|
||||
f"Consider using --deepep-config to change the behavior."
|
||||
)
|
||||
|
||||
cls._buffer = Buffer(
|
||||
group,
|
||||
num_nvl_bytes,
|
||||
|
||||
Reference in New Issue
Block a user