[Generative Score API] Optimization to Remove Decode. (#8840)
This commit is contained in:
committed by
GitHub
parent
9e426466af
commit
a027a9b4b3
@@ -1466,8 +1466,9 @@ class Scheduler(
|
||||
if self.last_batch.batch_size() < last_bs:
|
||||
self.running_batch.batch_is_full = False
|
||||
|
||||
# Merge the new batch into the running batch
|
||||
if not self.last_batch.is_empty():
|
||||
# Merge the new batch into the running batch.
|
||||
# For prefill-only batch, we can avoid going through decoding step.
|
||||
if not self.last_batch.is_empty() and not self.last_batch.is_prefill_only:
|
||||
if self.running_batch.is_empty():
|
||||
self.running_batch = self.last_batch
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user