[PD] Fix dynamic port support and MLA buffer for Mooncake (#5415)
Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com> Co-authored-by: ybyang <ybyang7@iflytek.com>
This commit is contained in:
@@ -67,6 +67,7 @@ class PrefillBootstrapQueue:
|
||||
bootstrap_port: int,
|
||||
gloo_group: ProcessGroup,
|
||||
transfer_backend: TransferBackend,
|
||||
scheduler: Scheduler,
|
||||
):
|
||||
self.token_to_kv_pool = token_to_kv_pool
|
||||
self.aux_dtype = aux_dtype
|
||||
@@ -76,6 +77,7 @@ class PrefillBootstrapQueue:
|
||||
self.tp_rank = tp_rank
|
||||
self.tp_size = tp_size
|
||||
self.transfer_backend = transfer_backend
|
||||
self.scheduler = scheduler
|
||||
self.kv_manager = self._init_kv_manager()
|
||||
self.queue: List[Req] = []
|
||||
self.gloo_group = gloo_group
|
||||
@@ -108,8 +110,11 @@ class PrefillBootstrapQueue:
|
||||
metadata_buffer[0].nbytes for metadata_buffer in self.metadata_buffers
|
||||
]
|
||||
kv_args.ib_device = "mock-ib-device"
|
||||
kv_args.gpu_id = self.scheduler.gpu_id
|
||||
kv_manager_class = get_kv_class(self.transfer_backend, KVClassType.MANAGER)
|
||||
kv_manager = kv_manager_class(kv_args, DisaggregationMode.PREFILL)
|
||||
kv_manager = kv_manager_class(
|
||||
kv_args, DisaggregationMode.PREFILL, self.scheduler.server_args
|
||||
)
|
||||
return kv_manager
|
||||
|
||||
def add(self, req: Req) -> None:
|
||||
|
||||
Reference in New Issue
Block a user