[PD] Support prefill overlap + Ensure no race condition (#5609)

This commit is contained in:
Byron Hsu
2025-04-21 12:12:56 -07:00
committed by GitHub
parent e65b9f21e3
commit bf98d2e377
5 changed files with 107 additions and 18 deletions

View File

@@ -539,6 +539,11 @@ class Req:
# The first output_id transferred from prefill instance.
self.transferred_output_id: Optional[int] = None
# For overlap schedule, we delay the kv transfer until `process_batch_result_disagg_prefill` rather than `process_prefill_chunk` in non-overlap
# This is because kv is not ready in `process_prefill_chunk`.
# We use `tmp_end_idx` to store the end index of the kv cache to send.
self.tmp_end_idx: int = -1
@property
def seqlen(self):
return len(self.origin_input_ids) + len(self.output_ids)

View File

@@ -2014,7 +2014,10 @@ def run_scheduler_process(
else:
scheduler.event_loop_normal()
elif disaggregation_mode == DisaggregationMode.PREFILL:
scheduler.event_loop_normal_disagg_prefill()
if scheduler.enable_overlap:
scheduler.event_loop_overlap_disagg_prefill()
else:
scheduler.event_loop_normal_disagg_prefill()
elif disaggregation_mode == DisaggregationMode.DECODE:
if scheduler.enable_overlap:
scheduler.event_loop_overlap_disagg_decode()