[HiCache] resolve conflict between chunked-prefill and hicache hit count (#9776)
This commit is contained in:
@@ -1503,7 +1503,7 @@ class Scheduler(
|
||||
# Move the chunked request out of the batch so that we can merge
|
||||
# only finished requests to running_batch.
|
||||
chunked_req_to_exclude.add(self.chunked_req)
|
||||
self.tree_cache.cache_unfinished_req(self.chunked_req)
|
||||
self.tree_cache.cache_unfinished_req(self.chunked_req, chunked=True)
|
||||
# chunked request keeps its rid but will get a new req_pool_idx
|
||||
self.req_to_token_pool.free(self.chunked_req.req_pool_idx)
|
||||
if self.last_batch and self.last_batch.forward_mode.is_extend():
|
||||
|
||||
Reference in New Issue
Block a user