[HiCacheStorage] fix abort request host memory leaks (#9874)

Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
This commit is contained in:
huangtingwei
2025-09-02 09:59:29 +08:00
committed by GitHub
parent 9db8025376
commit cb9e0e4180
2 changed files with 22 additions and 3 deletions

View File

@@ -2403,6 +2403,9 @@ class Scheduler(
# This only works for requests that have not started anything.
# We still need to send something back to TokenizerManager to clean up the state.
req = self.waiting_queue.pop(i)
if self.enable_hicache_storage:
# to release prefetch events associated with the request
self.tree_cache.release_aborted_request(req.rid)
self.send_to_tokenizer.send_pyobj(AbortReq(req.rid))
# For disaggregation decode mode, the request in the waiting queue has KV cache allocated.
if self.disaggregation_mode == DisaggregationMode.DECODE: