Fix broadcast use cuda device lead to memory capacity unbalanced (#5416)

This commit is contained in:
lambert0312
2025-04-15 17:47:26 +08:00
committed by GitHub
parent d06a83fb01
commit 471650dee0
3 changed files with 35 additions and 11 deletions

View File

@@ -118,6 +118,7 @@ class VerlEngine:
rank=self._tp_rank,
dist_group=self._device_mesh_cpu.get_group(),
src=self._device_mesh_cpu.mesh[0].item(),
force_cpu_device=False,
)
return output