[Auto Sync] Update activation.py, chunk_cache.py, utils.py (20250917) (#10538)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
@@ -28,6 +28,13 @@ class ChunkCache(BasePrefixCache):
|
||||
self.token_to_kv_pool_allocator = token_to_kv_pool_allocator
|
||||
self.page_size = page_size
|
||||
|
||||
# NOTE (csy): this is to determine if a cache has prefix matching feature.
|
||||
# Chunk cache always return True to indicate no prefix matching.
|
||||
# TODO (csy): Using a prefix cache trait to replace this
|
||||
@property
|
||||
def disable(self):
|
||||
return True
|
||||
|
||||
def reset(self):
|
||||
pass
|
||||
|
||||
@@ -38,7 +45,7 @@ class ChunkCache(BasePrefixCache):
|
||||
last_host_node=None,
|
||||
)
|
||||
|
||||
def cache_finished_req(self, req: Req):
|
||||
def cache_finished_req(self, req: Req, insert: bool = True):
|
||||
kv_indices = self.req_to_token_pool.req_to_token[
|
||||
req.req_pool_idx,
|
||||
# For decode server: if req.output_ids is empty, we want to free all req.origin_input_ids
|
||||
|
||||
Reference in New Issue
Block a user