Fix mixed chunked prefill (#1850)

This commit is contained in:
Lianmin Zheng
2024-10-30 21:20:41 -07:00
committed by GitHub
parent a7a0a6886b
commit f7102fbd2b
3 changed files with 80 additions and 23 deletions

View File

@@ -720,9 +720,11 @@ class Scheduler:
# Mixed-style chunked prefill
if self.is_mixed_chunk and self.running_batch is not None:
self.running_batch.prepare_for_decode(self.enable_overlap)
new_batch.mix_with_running(self.running_batch)
new_batch.decoding_reqs = self.running_batch.reqs
self.running_batch.filter_batch()
if not self.running_batch.is_empty():
self.running_batch.prepare_for_decode(self.enable_overlap)
new_batch.mix_with_running(self.running_batch)
new_batch.decoding_reqs = self.running_batch.reqs
self.running_batch = None
else:
new_batch.decoding_reqs = None