[Core] in batch prefix caching by delay scheduling (#2442)

This commit is contained in:
SangBin Cho
2024-12-11 12:51:50 -08:00
committed by GitHub
parent 864bf2ba00
commit 9208618b3e
8 changed files with 87 additions and 16 deletions

View File

@@ -713,7 +713,7 @@ class Scheduler:
if crash_on_warnings():
raise ValueError(msg)
def get_next_batch_to_run(self):
def get_next_batch_to_run(self) -> Optional[ScheduleBatch]:
# Merge the prefill batch into the running batch
if self.last_batch and self.last_batch.forward_mode.is_extend():
if self.being_chunked_req: