Fix different device type adjustment in PP (#7760)

2025-07-15 19:37:14 -07:00
parent 7498522f7d
commit 3bc43c683e
4 changed files with 25 additions and 27 deletions
--- a/python/sglang/srt/distributed/parallel_state.py
+++ b/python/sglang/srt/distributed/parallel_state.py
@@ -699,14 +699,14 @@ class GroupCoordinator:
        )

        # Serialize object to tensor and get the size as well
-        object_tensor = torch.frombuffer(pickle.dumps(obj), dtype=torch.uint8).cuda(
-            device=torch.cuda.current_device()
+        object_tensor = torch.frombuffer(pickle.dumps(obj), dtype=torch.uint8).to(
+            device=self.device
        )

        size_tensor = torch.tensor(
            [object_tensor.numel()],
            dtype=torch.long,
-            device=torch.cuda.current_device(),
+            device=self.device,
        )

        # Send object size
@@ -731,9 +731,7 @@ class GroupCoordinator:
            src != self.rank_in_group
        ), "Invalid source rank. Source rank is the same as the current rank."

-        size_tensor = torch.empty(
-            1, dtype=torch.long, device=torch.cuda.current_device()
-        )
+        size_tensor = torch.empty(1, dtype=torch.long, device=self.device)

        # Receive object size
        rank_size = torch.distributed.recv(
@@ -744,7 +742,7 @@ class GroupCoordinator:
        object_tensor = torch.empty(  # type: ignore[call-overload]
            size_tensor.item(),  # type: ignore[arg-type]
            dtype=torch.uint8,
-            device=torch.cuda.current_device(),
+            device=self.device,
        )

        rank_object = torch.distributed.recv(