[Fix] Fix the bug of the calculation of base_gpu_id (dp offset) in data_parallel_controller.py (#10741)

This commit is contained in:
XSongQ
2025-10-03 13:25:57 -07:00
committed by GitHub
parent 4726c9197f
commit 546914fa2d

View File

@@ -209,7 +209,9 @@ class DataParallelController:
args=(server_args, tmp_port_args, base_gpu_id, dp_rank, ready_event),
)
threads.append(thread)
base_gpu_id += server_args.tp_size * server_args.gpu_id_step
base_gpu_id += (
server_args.tp_size * server_args.pp_size * server_args.gpu_id_step
)
# Free all sockets before starting the threads to launch TP workers
for sock in sockets: