Tiny add warnings for DeepEP when it is suboptimal (#8426)
This commit is contained in:
@@ -157,6 +157,20 @@ class DeepEPBuffer:
|
|||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
total_num_sms = torch.cuda.get_device_properties(
|
||||||
|
device="cuda"
|
||||||
|
).multi_processor_count
|
||||||
|
if (
|
||||||
|
(deepep_mode != DeepEPMode.low_latency)
|
||||||
|
and not global_server_args_dict["enable_two_batch_overlap"]
|
||||||
|
and (DeepEPConfig.get_instance().num_sms < total_num_sms // 2)
|
||||||
|
):
|
||||||
|
logger.warning(
|
||||||
|
f"Only use {DeepEPConfig.get_instance().num_sms} SMs for DeepEP communication. "
|
||||||
|
f"This may result in highly suboptimal performance. "
|
||||||
|
f"Consider using --deepep-config to change the behavior."
|
||||||
|
)
|
||||||
|
|
||||||
cls._buffer = Buffer(
|
cls._buffer = Buffer(
|
||||||
group,
|
group,
|
||||||
num_nvl_bytes,
|
num_nvl_bytes,
|
||||||
|
|||||||
Reference in New Issue
Block a user