Upgrade to vllm 0.17.0 corex v4.1 overlay
This commit is contained in:
@@ -413,7 +413,20 @@ class TpKVTopology:
|
||||
f"by local tensor parallel size {self.tp_size}."
|
||||
)
|
||||
# P TP > D TP case, return the ratio as negative
|
||||
return -remote_tp_size // self.tp_size
|
||||
return remote_tp_size // self.tp_size
|
||||
|
||||
def pp_ratio(
|
||||
self,
|
||||
remote_pp_size: int,
|
||||
) -> int:
|
||||
"""
|
||||
Calculate the pipeline parallel ratio between local and remote PP.
|
||||
"""
|
||||
assert self.pp_size % remote_pp_size == 0 or remote_pp_size % self.pp_size == 0, (
|
||||
f"Local pipline parallel size {self.tp_size} is not divisible "
|
||||
f"by remote pipline parallel size {remote_pp_size} or vice versa."
|
||||
)
|
||||
return self.pp_size // remote_pp_size if self.pp_size % remote_pp_size == 0 else remote_pp_size // self.pp_size
|
||||
|
||||
def block_size_ratio(
|
||||
self,
|
||||
@@ -457,6 +470,7 @@ class TpKVTopology:
|
||||
def get_target_remote_ranks(
|
||||
self,
|
||||
remote_tp_size: int,
|
||||
remote_pp_size: int
|
||||
) -> list[int]:
|
||||
"""
|
||||
Get the remote TP rank (on P) that the current local TP rank
|
||||
@@ -464,19 +478,36 @@ class TpKVTopology:
|
||||
read from multiple remote ranks.
|
||||
"""
|
||||
tp_ratio = self.tp_ratio(remote_tp_size)
|
||||
if tp_ratio > 0:
|
||||
return [self.tp_rank // tp_ratio]
|
||||
pp_ratio = self.pp_ratio(remote_pp_size)
|
||||
target_pp_rank_list = []
|
||||
target_tp_rank_list = []
|
||||
if self.pp_size < remote_pp_size:
|
||||
for i in range(pp_ratio):
|
||||
target_pp_rank_list.append(self.pp_rank * pp_ratio + i)
|
||||
else:
|
||||
target_pp_rank_list.append(self.pp_rank // pp_ratio)
|
||||
|
||||
# P TP > D TP case, D reads from |tp_ratio| remote workers.
|
||||
tp_ratio = -tp_ratio
|
||||
return [self.tp_rank * tp_ratio + i for i in range(tp_ratio)]
|
||||
if self.tp_size < remote_tp_size:
|
||||
for i in range(tp_ratio):
|
||||
target_tp_rank_list.append(self.tp_rank * tp_ratio + i)
|
||||
else:
|
||||
target_tp_rank_list.append(self.tp_rank // tp_ratio)
|
||||
|
||||
target_rank_list = []
|
||||
for pp_rank in target_pp_rank_list:
|
||||
for tp_rank in target_tp_rank_list:
|
||||
target_rank = pp_rank * remote_tp_size + tp_rank
|
||||
target_rank_list.append((target_rank, pp_rank, tp_rank))
|
||||
|
||||
return target_rank_list
|
||||
|
||||
def get_target_remote_ranks_from_engine_id(
|
||||
self,
|
||||
remote_engine_id: EngineId,
|
||||
) -> list[int]:
|
||||
remote_tp_size = self.remote_tp_size[remote_engine_id]
|
||||
return self.get_target_remote_ranks(remote_tp_size)
|
||||
remote_pp_size = self.remote_pp_size[remote_engine_id]
|
||||
return self.get_target_remote_ranks(remote_tp_size, remote_pp_size)
|
||||
|
||||
|
||||
def get_current_attn_backend(vllm_config: VllmConfig):
|
||||
|
||||
Reference in New Issue
Block a user