[Feature] Hybrid EP and TP (#8590)

This commit is contained in:
Cheng Wan
2025-07-31 02:53:25 -07:00
committed by GitHub
parent 51c38163c1
commit 7a1f7fc504
14 changed files with 142 additions and 39 deletions

View File

@@ -200,15 +200,18 @@ class Scheduler(
port_args: PortArgs,
gpu_id: int,
tp_rank: int,
moe_ep_rank: int,
pp_rank: int,
dp_rank: Optional[int],
):
# Parse args
self.server_args = server_args
self.tp_rank = tp_rank
self.moe_ep_rank = moe_ep_rank
self.pp_rank = pp_rank
self.dp_rank = dp_rank
self.tp_size = server_args.tp_size
self.moe_ep_size = server_args.ep_size
self.pp_size = server_args.pp_size
self.dp_size = server_args.dp_size
self.schedule_policy = server_args.schedule_policy
@@ -310,6 +313,7 @@ class Scheduler(
server_args=server_args,
gpu_id=gpu_id,
tp_rank=tp_rank,
moe_ep_rank=moe_ep_rank,
pp_rank=pp_rank,
dp_rank=dp_rank,
nccl_port=port_args.nccl_port,
@@ -322,6 +326,7 @@ class Scheduler(
self.draft_worker = EAGLEWorker(
gpu_id=gpu_id,
tp_rank=tp_rank,
moe_ep_rank=moe_ep_rank,
server_args=server_args,
nccl_port=port_args.nccl_port,
target_worker=self.tp_worker,
@@ -2358,6 +2363,7 @@ def run_scheduler_process(
port_args: PortArgs,
gpu_id: int,
tp_rank: int,
moe_ep_rank: int,
pp_rank: int,
dp_rank: Optional[int],
pipe_writer,
@@ -2368,6 +2374,8 @@ def run_scheduler_process(
prefix += f" DP{dp_rank}"
if server_args.tp_size > 1:
prefix += f" TP{tp_rank}"
if server_args.ep_size > 1:
prefix += f" EP{moe_ep_rank}"
if server_args.pp_size > 1:
prefix += f" PP{pp_rank}"
@@ -2391,7 +2399,9 @@ def run_scheduler_process(
# Create a scheduler and run the event loop
try:
scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, pp_rank, dp_rank)
scheduler = Scheduler(
server_args, port_args, gpu_id, tp_rank, moe_ep_rank, pp_rank, dp_rank
)
pipe_writer.send(
{
"status": "ready",