[Feature] Hybrid EP and TP (#8590)

This commit is contained in:
Cheng Wan
2025-07-31 02:53:25 -07:00
committed by GitHub
parent 51c38163c1
commit 7a1f7fc504
14 changed files with 142 additions and 39 deletions

View File

@@ -138,6 +138,7 @@ class BenchArgs:
def load_model(server_args, port_args, tp_rank):
suppress_other_loggers()
rank_print = print if tp_rank == 0 else lambda *args, **kwargs: None
moe_ep_rank = tp_rank // (server_args.tp_size // server_args.ep_size)
model_config = ModelConfig.from_server_args(server_args)
model_runner = ModelRunner(
@@ -146,6 +147,8 @@ def load_model(server_args, port_args, tp_rank):
gpu_id=tp_rank,
tp_rank=tp_rank,
tp_size=server_args.tp_size,
moe_ep_rank=moe_ep_rank,
moe_ep_size=server_args.ep_size,
pp_rank=0,
pp_size=1,
nccl_port=port_args.nccl_port,