[Refactor] Clean up radix cache related API (#7303)

Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
This commit is contained in:
DarkSharpness
2025-06-19 09:58:48 -07:00
committed by GitHub
parent 650127a173
commit 47367b768d
7 changed files with 153 additions and 122 deletions

View File

@@ -1467,15 +1467,14 @@ class Scheduler(
return None
if self.enable_hierarchical_cache:
# check for completion of hierarchical cache activities to release memory
self.tree_cache.writing_check()
self.tree_cache.loading_check()
self.tree_cache.check_hicache_events()
# Get priority queue
prefix_computed = self.policy.calc_priority(self.waiting_queue)
self.policy.calc_priority(self.waiting_queue)
# Prefill policy
adder = PrefillAdder(
self.page_size,
self.tree_cache,
self.token_to_kv_pool_allocator,
self.running_batch,
@@ -1517,19 +1516,8 @@ class Scheduler(
self.running_batch.batch_is_full = True
break
# bypass prefix_computed if enable_hierarchical_cache
req.init_next_round_input(
(
None
if (prefix_computed and not self.enable_hierarchical_cache)
else self.tree_cache
),
self.enable_hierarchical_cache,
)
res = adder.add_one_req(
req, self.chunked_req, self.enable_hierarchical_cache
)
req.init_next_round_input(self.tree_cache)
res = adder.add_one_req(req, has_chunked_req=(self.chunked_req is not None))
if res != AddReqResult.CONTINUE:
if res == AddReqResult.NO_TOKEN:
@@ -1581,7 +1569,9 @@ class Scheduler(
)
if self.enable_hierarchical_cache:
# todo (zhiqiang): disable cuda graph execution if hicache loading triggered
new_batch.hicache_consumer_index = self.tree_cache.ready_to_load_cache()
new_batch.hicache_consumer_index = (
self.tree_cache.ready_to_load_host_cache()
)
new_batch.prepare_for_extend()