[PD] Fix failure abort (#6535)

This commit is contained in:
Byron Hsu
2025-05-22 20:32:03 -07:00
committed by GitHub
parent a6970a17f3
commit 0a4fc73b48
6 changed files with 141 additions and 92 deletions

View File

@@ -38,7 +38,9 @@ class ChunkCache(BasePrefixCache):
def cache_finished_req(self, req: Req):
kv_indices = self.req_to_token_pool.req_to_token[
req.req_pool_idx, : len(req.origin_input_ids) + len(req.output_ids) - 1
req.req_pool_idx,
# For decode server: if req.output_ids is empty, we want to free all req.origin_input_ids
: len(req.origin_input_ids) + max(len(req.output_ids) - 1, 0),
]
self.req_to_token_pool.free(req.req_pool_idx)
self.token_to_kv_pool_allocator.free(kv_indices)