[PD] Support prefill overlap + Ensure no race condition (#5609)
This commit is contained in:
@@ -539,6 +539,11 @@ class Req:
|
||||
# The first output_id transferred from prefill instance.
|
||||
self.transferred_output_id: Optional[int] = None
|
||||
|
||||
# For overlap schedule, we delay the kv transfer until `process_batch_result_disagg_prefill` rather than `process_prefill_chunk` in non-overlap
|
||||
# This is because kv is not ready in `process_prefill_chunk`.
|
||||
# We use `tmp_end_idx` to store the end index of the kv cache to send.
|
||||
self.tmp_end_idx: int = -1
|
||||
|
||||
@property
|
||||
def seqlen(self):
|
||||
return len(self.origin_input_ids) + len(self.output_ids)
|
||||
|
||||
@@ -2014,7 +2014,10 @@ def run_scheduler_process(
|
||||
else:
|
||||
scheduler.event_loop_normal()
|
||||
elif disaggregation_mode == DisaggregationMode.PREFILL:
|
||||
scheduler.event_loop_normal_disagg_prefill()
|
||||
if scheduler.enable_overlap:
|
||||
scheduler.event_loop_overlap_disagg_prefill()
|
||||
else:
|
||||
scheduler.event_loop_normal_disagg_prefill()
|
||||
elif disaggregation_mode == DisaggregationMode.DECODE:
|
||||
if scheduler.enable_overlap:
|
||||
scheduler.event_loop_overlap_disagg_decode()
|
||||
|
||||
Reference in New Issue
Block a user