[PD] Fix dynamic port support and MLA buffer for Mooncake (#5415)

Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com>
Co-authored-by: ybyang <ybyang7@iflytek.com>
This commit is contained in:
shangmingc
2025-04-15 19:29:31 +08:00
committed by GitHub
parent 471650dee0
commit ffde65a094
6 changed files with 171 additions and 180 deletions

View File

@@ -599,6 +599,7 @@ class Scheduler(
bootstrap_port=self.server_args.disaggregation_bootstrap_port,
gloo_group=self.tp_worker.get_attention_tp_cpu_group(),
transfer_backend=self.transfer_backend,
scheduler=self,
)
# The prefill requests that are in the middle of kv sending
self.disagg_prefill_inflight_queue: List[Req] = []