[Fix] Fix the bug of the calculation of base_gpu_id (dp offset) in data_parallel_controller.py (#10741)
This commit is contained in:
@@ -209,7 +209,9 @@ class DataParallelController:
|
||||
args=(server_args, tmp_port_args, base_gpu_id, dp_rank, ready_event),
|
||||
)
|
||||
threads.append(thread)
|
||||
base_gpu_id += server_args.tp_size * server_args.gpu_id_step
|
||||
base_gpu_id += (
|
||||
server_args.tp_size * server_args.pp_size * server_args.gpu_id_step
|
||||
)
|
||||
|
||||
# Free all sockets before starting the threads to launch TP workers
|
||||
for sock in sockets:
|
||||
|
||||
Reference in New Issue
Block a user