[Feature] Hybrid EP and TP (#8590)
This commit is contained in:
@@ -200,15 +200,18 @@ class Scheduler(
|
||||
port_args: PortArgs,
|
||||
gpu_id: int,
|
||||
tp_rank: int,
|
||||
moe_ep_rank: int,
|
||||
pp_rank: int,
|
||||
dp_rank: Optional[int],
|
||||
):
|
||||
# Parse args
|
||||
self.server_args = server_args
|
||||
self.tp_rank = tp_rank
|
||||
self.moe_ep_rank = moe_ep_rank
|
||||
self.pp_rank = pp_rank
|
||||
self.dp_rank = dp_rank
|
||||
self.tp_size = server_args.tp_size
|
||||
self.moe_ep_size = server_args.ep_size
|
||||
self.pp_size = server_args.pp_size
|
||||
self.dp_size = server_args.dp_size
|
||||
self.schedule_policy = server_args.schedule_policy
|
||||
@@ -310,6 +313,7 @@ class Scheduler(
|
||||
server_args=server_args,
|
||||
gpu_id=gpu_id,
|
||||
tp_rank=tp_rank,
|
||||
moe_ep_rank=moe_ep_rank,
|
||||
pp_rank=pp_rank,
|
||||
dp_rank=dp_rank,
|
||||
nccl_port=port_args.nccl_port,
|
||||
@@ -322,6 +326,7 @@ class Scheduler(
|
||||
self.draft_worker = EAGLEWorker(
|
||||
gpu_id=gpu_id,
|
||||
tp_rank=tp_rank,
|
||||
moe_ep_rank=moe_ep_rank,
|
||||
server_args=server_args,
|
||||
nccl_port=port_args.nccl_port,
|
||||
target_worker=self.tp_worker,
|
||||
@@ -2358,6 +2363,7 @@ def run_scheduler_process(
|
||||
port_args: PortArgs,
|
||||
gpu_id: int,
|
||||
tp_rank: int,
|
||||
moe_ep_rank: int,
|
||||
pp_rank: int,
|
||||
dp_rank: Optional[int],
|
||||
pipe_writer,
|
||||
@@ -2368,6 +2374,8 @@ def run_scheduler_process(
|
||||
prefix += f" DP{dp_rank}"
|
||||
if server_args.tp_size > 1:
|
||||
prefix += f" TP{tp_rank}"
|
||||
if server_args.ep_size > 1:
|
||||
prefix += f" EP{moe_ep_rank}"
|
||||
if server_args.pp_size > 1:
|
||||
prefix += f" PP{pp_rank}"
|
||||
|
||||
@@ -2391,7 +2399,9 @@ def run_scheduler_process(
|
||||
|
||||
# Create a scheduler and run the event loop
|
||||
try:
|
||||
scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, pp_rank, dp_rank)
|
||||
scheduler = Scheduler(
|
||||
server_args, port_args, gpu_id, tp_rank, moe_ep_rank, pp_rank, dp_rank
|
||||
)
|
||||
pipe_writer.send(
|
||||
{
|
||||
"status": "ready",
|
||||
|
||||
Reference in New Issue
Block a user