Fix eagle hang issue for max_new_tokens=1 (#4185)

This commit is contained in:
Ke Bao
2025-03-08 04:11:18 +08:00
committed by GitHub
parent 70866b6f4f
commit 20c8119915

View File

@@ -957,7 +957,11 @@ class Scheduler:
self.req_to_token_pool.free(self.chunked_req.req_pool_idx)
self.batch_is_full = False
last_bs = self.last_batch.batch_size()
self.last_batch.filter_batch()
if self.last_batch.batch_size() < last_bs:
self.batch_is_full = False
if not self.last_batch.is_empty():
if self.running_batch is None:
self.running_batch = self.last_batch