[Auto Sync] Update activation.py, chunk_cache.py, utils.py (20250917) (#10538)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Lianmin Zheng
2025-09-16 17:06:43 -07:00
committed by GitHub
parent c49484a658
commit f949ad5794
3 changed files with 15 additions and 9 deletions

View File

@@ -28,6 +28,13 @@ class ChunkCache(BasePrefixCache):
self.token_to_kv_pool_allocator = token_to_kv_pool_allocator
self.page_size = page_size
# NOTE (csy): this is to determine if a cache has prefix matching feature.
# Chunk cache always return True to indicate no prefix matching.
# TODO (csy): Using a prefix cache trait to replace this
@property
def disable(self):
return True
def reset(self):
pass
@@ -38,7 +45,7 @@ class ChunkCache(BasePrefixCache):
last_host_node=None,
)
def cache_finished_req(self, req: Req):
def cache_finished_req(self, req: Req, insert: bool = True):
kv_indices = self.req_to_token_pool.req_to_token[
req.req_pool_idx,
# For decode server: if req.output_ids is empty, we want to free all req.origin_input_ids