Fix different device type adjustment in PP (#7760)

This commit is contained in:
Qiaolin Yu
2025-07-15 19:37:14 -07:00
committed by GitHub
parent 7498522f7d
commit 3bc43c683e
4 changed files with 25 additions and 27 deletions

View File

@@ -962,6 +962,7 @@ class Scheduler(
self.world_group.device_group,
self.pp_rank * self.tp_size + dp_offset,
(self.pp_rank + 1) * self.tp_size + dp_offset,
device=self.device,
)
# send out proxy tensors to the next stage
@@ -1010,6 +1011,7 @@ class Scheduler(
self.world_group.device_group,
(self.pp_rank - 1) * self.tp_size + dp_offset,
self.pp_rank * self.tp_size + dp_offset,
device=self.device,
)
else:
recv_reqs = None
@@ -1040,6 +1042,7 @@ class Scheduler(
self.attn_tp_group.rank,
self.attn_tp_cpu_group,
src=self.attn_tp_group.ranks[0],
device=self.device,
)
if self.tp_size != 1:
control_reqs = broadcast_pyobj(
@@ -1047,6 +1050,7 @@ class Scheduler(
self.tp_group.rank,
self.tp_cpu_group,
src=self.tp_group.ranks[0],
device=self.device,
)
recv_reqs = work_reqs + control_reqs
elif self.tp_size != 1:
@@ -1055,6 +1059,7 @@ class Scheduler(
self.tp_group.rank,
self.tp_cpu_group,
src=self.tp_group.ranks[0],
device=self.device,
)
return recv_reqs

View File

@@ -144,6 +144,7 @@ class TpModelWorker:
self.tp_size * self.pp_rank + tp_rank,
self.world_group.cpu_group,
src=self.world_group.ranks[0],
device=self.device,
)[0]
set_random_seed(self.random_seed)