Query remaining memory dynamically for PrefillAdder (#2941)

This commit is contained in:
Zhiqiang Xie
2025-01-17 20:20:26 -08:00
committed by GitHub
parent d3024f4fc8
commit 8af7048dcf
2 changed files with 27 additions and 12 deletions

View File

@@ -891,9 +891,9 @@ class Scheduler:
# Prefill policy
adder = PrefillAdder(
self.tree_cache,
self.token_to_kv_pool,
self.running_batch,
self.new_token_ratio,
self.token_to_kv_pool.available_size() + self.tree_cache.evictable_size(),
self.max_prefill_tokens,
self.chunked_prefill_size,
running_bs if self.is_mixed_chunk else 0,