[HiCache] resolve conflict between chunked-prefill and hicache hit count (#9776)

This commit is contained in:
Zhiqiang Xie
2025-08-29 10:30:54 -07:00
committed by GitHub
parent e5b29bf14e
commit 54e872d343
8 changed files with 20 additions and 17 deletions

View File

@@ -567,7 +567,7 @@ class SchedulerDisaggregationPrefillMixin:
# Move the chunked request out of the batch so that we can merge
# only finished requests to running_batch.
self.last_batch.filter_batch(chunked_req_to_exclude=self.chunked_req)
self.tree_cache.cache_unfinished_req(self.chunked_req)
self.tree_cache.cache_unfinished_req(self.chunked_req, chunked=True)
if self.enable_overlap:
# Delay KV transfer to process_batch_result_disagg_prefill when overlap is enabled to ensure results are resolved
self.chunked_req.tmp_end_idx = min(