[HiCache] resolve conflict between chunked-prefill and hicache hit count (#9776)

This commit is contained in:
Zhiqiang Xie
2025-08-29 10:30:54 -07:00
committed by GitHub
parent e5b29bf14e
commit 54e872d343
8 changed files with 20 additions and 17 deletions

View File

@@ -1503,7 +1503,7 @@ class Scheduler(
# Move the chunked request out of the batch so that we can merge
# only finished requests to running_batch.
chunked_req_to_exclude.add(self.chunked_req)
self.tree_cache.cache_unfinished_req(self.chunked_req)
self.tree_cache.cache_unfinished_req(self.chunked_req, chunked=True)
# chunked request keeps its rid but will get a new req_pool_idx
self.req_to_token_pool.free(self.chunked_req.req_pool_idx)
if self.last_batch and self.last_batch.forward_mode.is_extend():