Upgrade to vllm 0.17.0 corex v4.1 overlay

2026-04-29 19:38:22 +08:00
parent 8fac6062e4
commit 938d0854a5
430 changed files with 35969 additions and 14511 deletions
--- a/vllm/distributed/kv_transfer/kv_connector/utils.py
+++ b/vllm/distributed/kv_transfer/kv_connector/utils.py
@@ -413,7 +413,20 @@ class TpKVTopology:
            f"by local tensor parallel size {self.tp_size}."
        )
        # P TP > D TP case, return the ratio as negative
-        return -remote_tp_size // self.tp_size
+        return remote_tp_size // self.tp_size
+
+    def pp_ratio(
+        self,
+        remote_pp_size: int,
+    ) -> int:
+        """
+        Calculate the pipeline parallel ratio between local and remote PP.
+        """
+        assert self.pp_size % remote_pp_size == 0 or remote_pp_size % self.pp_size == 0, (
+            f"Local pipline parallel size {self.tp_size} is not divisible "
+            f"by remote pipline parallel size {remote_pp_size} or vice versa."
+        )
+        return self.pp_size // remote_pp_size if self.pp_size % remote_pp_size == 0 else remote_pp_size // self.pp_size

    def block_size_ratio(
        self,
@@ -457,6 +470,7 @@ class TpKVTopology:
    def get_target_remote_ranks(
        self,
        remote_tp_size: int,
+        remote_pp_size: int
    ) -> list[int]:
        """
        Get the remote TP rank (on P) that the current local TP rank
@@ -464,19 +478,36 @@ class TpKVTopology:
        read from multiple remote ranks.
        """
        tp_ratio = self.tp_ratio(remote_tp_size)
-        if tp_ratio > 0:
-            return [self.tp_rank // tp_ratio]
+        pp_ratio = self.pp_ratio(remote_pp_size)
+        target_pp_rank_list = []  
+        target_tp_rank_list = []  
+        if self.pp_size < remote_pp_size:
+                for i in range(pp_ratio):
+                    target_pp_rank_list.append(self.pp_rank * pp_ratio + i)
+        else:
+            target_pp_rank_list.append(self.pp_rank // pp_ratio)

-        # P TP > D TP case, D reads from |tp_ratio| remote workers.
-        tp_ratio = -tp_ratio
-        return [self.tp_rank * tp_ratio + i for i in range(tp_ratio)]
+        if self.tp_size < remote_tp_size:
+            for i in range(tp_ratio):
+                target_tp_rank_list.append(self.tp_rank * tp_ratio + i)
+        else:
+            target_tp_rank_list.append(self.tp_rank // tp_ratio)
+
+        target_rank_list = []
+        for pp_rank in target_pp_rank_list:
+            for tp_rank in target_tp_rank_list:
+                target_rank = pp_rank * remote_tp_size + tp_rank
+                target_rank_list.append((target_rank, pp_rank, tp_rank))
+
+        return target_rank_list

    def get_target_remote_ranks_from_engine_id(
        self,
        remote_engine_id: EngineId,
    ) -> list[int]:
        remote_tp_size = self.remote_tp_size[remote_engine_id]
-        return self.get_target_remote_ranks(remote_tp_size)
+        remote_pp_size = self.remote_pp_size[remote_engine_id]
+        return self.get_target_remote_ranks(remote_tp_size, remote_pp_size)


 def get_current_attn_backend(vllm_config: VllmConfig):