Add grouped free operations (#1706)

This commit is contained in:
Lianmin Zheng
2024-10-18 13:21:05 -07:00
committed by GitHub
parent 392f2863c8
commit bc12d4033f
2 changed files with 23 additions and 4 deletions

View File

@@ -834,6 +834,8 @@ class Scheduler:
next_token_ids = self.resolve_next_token_ids(bid, next_token_ids)
self.token_to_kv_pool.free_group_begin()
# Check finish condition
for i, (req, next_token_id) in enumerate(zip(batch.reqs, next_token_ids)):
if self.server_args.enable_overlap_schedule and req.finished():
@@ -860,6 +862,8 @@ class Scheduler:
self.stream_output(batch.reqs)
self.token_to_kv_pool.free_group_end()
self.decode_forward_ct = (self.decode_forward_ct + 1) % (1 << 30)
if self.tp_rank == 0 and self.decode_forward_ct % 40 == 0:
self.print_decode_stats()