From 43de1d7304063ddb432a0990e65271d82f622e1f Mon Sep 17 00:00:00 2001 From: Zhiqiang Xie Date: Tue, 26 Aug 2025 10:49:40 -0700 Subject: [PATCH] HiCache Storage fix host memory leak (#9648) --- python/sglang/srt/managers/scheduler.py | 9 +++++---- python/sglang/srt/mem_cache/hiradix_cache.py | 2 ++ python/sglang/srt/mem_cache/radix_cache.py | 1 + 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index 34c2b164c..f897a5dd4 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -1296,10 +1296,11 @@ class Scheduler( def _prefetch_kvcache(self, req: Req): if self.enable_hicache_storage: req.init_next_round_input(self.tree_cache) - last_hash = req.last_host_node.get_last_hash_value() - matched_len = len(req.prefix_indices) + req.host_hit_length - # todo, free-form fetching, calculating hash keys on the fly - if (matched_len > 0 and last_hash is not None) or matched_len == 0: + if req.last_node.backuped: + # only to initiate the prefetch if the last node is backuped + # otherwise, the allocated GPU memory must be locked for integrity + last_hash = req.last_host_node.get_last_hash_value() + matched_len = len(req.prefix_indices) + req.host_hit_length new_input_tokens = req.fill_ids[matched_len:] self.tree_cache.prefetch_from_storage( req.rid, req.last_host_node, new_input_tokens, last_hash diff --git a/python/sglang/srt/mem_cache/hiradix_cache.py b/python/sglang/srt/mem_cache/hiradix_cache.py index d4ff703ba..0df7fb537 100644 --- a/python/sglang/srt/mem_cache/hiradix_cache.py +++ b/python/sglang/srt/mem_cache/hiradix_cache.py @@ -536,6 +536,8 @@ class HiRadixCache(RadixCache): while last_node.evicted: host_hit_length += len(last_node.host_value) last_node = last_node.parent + while not last_host_node.backuped: + last_host_node = last_host_node.parent return MatchResult( device_indices=value, diff --git a/python/sglang/srt/mem_cache/radix_cache.py b/python/sglang/srt/mem_cache/radix_cache.py index 847a7dbbf..f6383b4ce 100644 --- a/python/sglang/srt/mem_cache/radix_cache.py +++ b/python/sglang/srt/mem_cache/radix_cache.py @@ -152,6 +152,7 @@ class RadixCache(BasePrefixCache): self.root_node = TreeNode() self.root_node.key = [] self.root_node.value = [] + self.root_node.host_value = [] self.root_node.lock_ref = 1 self.evictable_size_ = 0 self.protected_size_ = 0