[PD] Add support for different TP sizes per DP rank (#5922)
Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com>
This commit is contained in:
@@ -34,6 +34,7 @@ from sglang.srt.disaggregation.utils import (
|
||||
ReqToMetadataIdxAllocator,
|
||||
TransferBackend,
|
||||
get_kv_class,
|
||||
is_mla_backend,
|
||||
kv_to_page_indices,
|
||||
kv_to_page_num,
|
||||
poll_and_all_reduce,
|
||||
@@ -69,6 +70,7 @@ class PrefillBootstrapQueue:
|
||||
scheduler: Scheduler,
|
||||
):
|
||||
self.token_to_kv_pool = token_to_kv_pool
|
||||
self.is_mla_backend = is_mla_backend(token_to_kv_pool)
|
||||
self.aux_dtype = aux_dtype
|
||||
|
||||
self.metadata_buffers = metadata_buffers
|
||||
@@ -112,7 +114,10 @@ class PrefillBootstrapQueue:
|
||||
kv_args.gpu_id = self.scheduler.gpu_id
|
||||
kv_manager_class = get_kv_class(self.transfer_backend, KVClassType.MANAGER)
|
||||
kv_manager = kv_manager_class(
|
||||
kv_args, DisaggregationMode.PREFILL, self.scheduler.server_args
|
||||
kv_args,
|
||||
DisaggregationMode.PREFILL,
|
||||
self.scheduler.server_args,
|
||||
self.is_mla_backend,
|
||||
)
|
||||
return kv_manager
|
||||
|
||||
|
||||
Reference in New Issue
Block a user