[Fix] Fix the bug of the calculation of base_gpu_id (dp offset) in data_parallel_controller.py (#10741)
This commit is contained in:
@@ -209,7 +209,9 @@ class DataParallelController:
|
|||||||
args=(server_args, tmp_port_args, base_gpu_id, dp_rank, ready_event),
|
args=(server_args, tmp_port_args, base_gpu_id, dp_rank, ready_event),
|
||||||
)
|
)
|
||||||
threads.append(thread)
|
threads.append(thread)
|
||||||
base_gpu_id += server_args.tp_size * server_args.gpu_id_step
|
base_gpu_id += (
|
||||||
|
server_args.tp_size * server_args.pp_size * server_args.gpu_id_step
|
||||||
|
)
|
||||||
|
|
||||||
# Free all sockets before starting the threads to launch TP workers
|
# Free all sockets before starting the threads to launch TP workers
|
||||||
for sock in sockets:
|
for sock in sockets:
|
||||||
|
|||||||
Reference in New Issue
Block a user