[Feature] Comprehensive Hybrid Parallelism Support (#6389)
This commit is contained in:
@@ -387,7 +387,6 @@ class ServerArgs:
|
||||
), "Please enable dp attention when setting enable_dp_attention. "
|
||||
|
||||
# DeepEP MoE
|
||||
self.enable_sp_layernorm = False
|
||||
if self.enable_deepep_moe:
|
||||
if self.deepep_mode == "auto":
|
||||
assert (
|
||||
@@ -397,9 +396,6 @@ class ServerArgs:
|
||||
logger.warning("Cuda graph is disabled because deepep_mode=`normal`")
|
||||
self.disable_cuda_graph = True
|
||||
self.ep_size = self.tp_size
|
||||
self.enable_sp_layernorm = (
|
||||
self.dp_size < self.tp_size if self.enable_dp_attention else True
|
||||
)
|
||||
logger.warning(
|
||||
f"DeepEP MoE is enabled. The expert parallel size is adjusted to be the same as the tensor parallel size[{self.tp_size}]."
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user