[PP] Add pipeline parallelism (#5724)
This commit is contained in:
@@ -154,6 +154,8 @@ def load_model(server_args, port_args, tp_rank):
|
||||
gpu_id=tp_rank,
|
||||
tp_rank=tp_rank,
|
||||
tp_size=server_args.tp_size,
|
||||
pp_rank=0,
|
||||
pp_size=1,
|
||||
nccl_port=port_args.nccl_port,
|
||||
server_args=server_args,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user