Remove overlap thread (#11210)

Co-authored-by: Lianmin Zheng <15100009+merrymercy@users.noreply.github.com>
Co-authored-by: Hanming Lu <69857889+hanming-lu@users.noreply.github.com>
This commit is contained in:
Liangsheng Yin
2025-10-07 20:12:12 +08:00
committed by GitHub
parent 24bc3fb0f9
commit 1519a89cfd
14 changed files with 280 additions and 184 deletions

View File

@@ -886,9 +886,6 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
# This is an optimization to reduce the overhead of the prefill check.
batch_is_full: bool = False
# Events
launch_done: Optional[threading.Event] = None
# For chunked prefill in PP
chunked_req: Optional[Req] = None
@@ -1877,7 +1874,6 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
)
),
extend_input_logprob_token_ids=self.extend_input_logprob_token_ids,
launch_done=self.launch_done,
is_prefill_only=self.is_prefill_only,
)
@@ -2018,8 +2014,8 @@ class ModelWorkerBatch:
capture_hidden_mode: CaptureHiddenMode = None
hicache_consumer_index: int = -1
# Overlap event
launch_done: Optional[threading.Event] = None
# Overlap scheduler related
delay_sample_launch: bool = False
# Whether this batch is prefill-only (no token generation needed)
is_prefill_only: bool = False