Upgrade to vllm 0.17.0 corex v4.1 overlay

This commit is contained in:
2026-04-29 19:38:22 +08:00
parent 8fac6062e4
commit 938d0854a5
430 changed files with 35969 additions and 14511 deletions

View File

@@ -413,7 +413,20 @@ class TpKVTopology:
f"by local tensor parallel size {self.tp_size}."
)
# P TP > D TP case, return the ratio as negative
return -remote_tp_size // self.tp_size
return remote_tp_size // self.tp_size
def pp_ratio(
self,
remote_pp_size: int,
) -> int:
"""
Calculate the pipeline parallel ratio between local and remote PP.
"""
assert self.pp_size % remote_pp_size == 0 or remote_pp_size % self.pp_size == 0, (
f"Local pipline parallel size {self.tp_size} is not divisible "
f"by remote pipline parallel size {remote_pp_size} or vice versa."
)
return self.pp_size // remote_pp_size if self.pp_size % remote_pp_size == 0 else remote_pp_size // self.pp_size
def block_size_ratio(
self,
@@ -457,6 +470,7 @@ class TpKVTopology:
def get_target_remote_ranks(
self,
remote_tp_size: int,
remote_pp_size: int
) -> list[int]:
"""
Get the remote TP rank (on P) that the current local TP rank
@@ -464,19 +478,36 @@ class TpKVTopology:
read from multiple remote ranks.
"""
tp_ratio = self.tp_ratio(remote_tp_size)
if tp_ratio > 0:
return [self.tp_rank // tp_ratio]
pp_ratio = self.pp_ratio(remote_pp_size)
target_pp_rank_list = []
target_tp_rank_list = []
if self.pp_size < remote_pp_size:
for i in range(pp_ratio):
target_pp_rank_list.append(self.pp_rank * pp_ratio + i)
else:
target_pp_rank_list.append(self.pp_rank // pp_ratio)
# P TP > D TP case, D reads from |tp_ratio| remote workers.
tp_ratio = -tp_ratio
return [self.tp_rank * tp_ratio + i for i in range(tp_ratio)]
if self.tp_size < remote_tp_size:
for i in range(tp_ratio):
target_tp_rank_list.append(self.tp_rank * tp_ratio + i)
else:
target_tp_rank_list.append(self.tp_rank // tp_ratio)
target_rank_list = []
for pp_rank in target_pp_rank_list:
for tp_rank in target_tp_rank_list:
target_rank = pp_rank * remote_tp_size + tp_rank
target_rank_list.append((target_rank, pp_rank, tp_rank))
return target_rank_list
def get_target_remote_ranks_from_engine_id(
self,
remote_engine_id: EngineId,
) -> list[int]:
remote_tp_size = self.remote_tp_size[remote_engine_id]
return self.get_target_remote_ranks(remote_tp_size)
remote_pp_size = self.remote_pp_size[remote_engine_id]
return self.get_target_remote_ranks(remote_tp_size, remote_pp_size)
def get_current_attn_backend(vllm_config: VllmConfig):