[PD] Add support for different TP sizes per DP rank (#5922)

Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com>
This commit is contained in:
shangmingc
2025-05-13 04:55:42 +08:00
committed by GitHub
parent 983c663de6
commit f1c896007a
8 changed files with 396 additions and 119 deletions

View File

@@ -34,6 +34,7 @@ from sglang.srt.disaggregation.utils import (
ReqToMetadataIdxAllocator,
TransferBackend,
get_kv_class,
is_mla_backend,
kv_to_page_indices,
kv_to_page_num,
poll_and_all_reduce,
@@ -69,6 +70,7 @@ class PrefillBootstrapQueue:
scheduler: Scheduler,
):
self.token_to_kv_pool = token_to_kv_pool
self.is_mla_backend = is_mla_backend(token_to_kv_pool)
self.aux_dtype = aux_dtype
self.metadata_buffers = metadata_buffers
@@ -112,7 +114,10 @@ class PrefillBootstrapQueue:
kv_args.gpu_id = self.scheduler.gpu_id
kv_manager_class = get_kv_class(self.transfer_backend, KVClassType.MANAGER)
kv_manager = kv_manager_class(
kv_args, DisaggregationMode.PREFILL, self.scheduler.server_args
kv_args,
DisaggregationMode.PREFILL,
self.scheduler.server_args,
self.is_mla_backend,
)
return kv_manager