[Feature] Hybrid EP and TP (#8590)

This commit is contained in:
Cheng Wan
2025-07-31 02:53:25 -07:00
committed by GitHub
parent 51c38163c1
commit 7a1f7fc504
14 changed files with 142 additions and 39 deletions

View File

@@ -270,14 +270,6 @@ class ServerArgs:
sm_group_num: int = 3
def __post_init__(self):
# Expert parallelism
# We put it here first due to some internal ckpt conversation issues.
if self.enable_ep_moe:
self.ep_size = self.tp_size
logger.warning(
f"EP MoE is enabled. The expert parallel size is adjusted to be the same as the tensor parallel size[{self.tp_size}]."
)
# Set missing default values
if self.tokenizer_path is None:
self.tokenizer_path = self.model_path
@@ -1335,6 +1327,7 @@ class ServerArgs:
parser.add_argument(
"--expert-parallel-size",
"--ep-size",
"--ep",
type=int,
default=ServerArgs.ep_size,
help="The expert parallelism size.",