[Generative Score API] Optimization to Remove Decode. (#8840)

This commit is contained in:
Sundara Raman Ramachandran
2025-08-13 14:12:24 -07:00
committed by GitHub
parent 9e426466af
commit a027a9b4b3
6 changed files with 843 additions and 20 deletions

View File

@@ -1466,8 +1466,9 @@ class Scheduler(
if self.last_batch.batch_size() < last_bs:
self.running_batch.batch_is_full = False
# Merge the new batch into the running batch
if not self.last_batch.is_empty():
# Merge the new batch into the running batch.
# For prefill-only batch, we can avoid going through decoding step.
if not self.last_batch.is_empty() and not self.last_batch.is_prefill_only:
if self.running_batch.is_empty():
self.running_batch = self.last_batch
else: