diff --git a/python/sglang/srt/managers/router/radix_cache.py b/python/sglang/srt/managers/router/radix_cache.py index 855a10bb6..5309a4265 100644 --- a/python/sglang/srt/managers/router/radix_cache.py +++ b/python/sglang/srt/managers/router/radix_cache.py @@ -58,7 +58,7 @@ class RadixCache: def insert(self, key, value=None): if self.disable: - return len(key) + return 0 if value is None: value = [x for x in key] @@ -76,6 +76,12 @@ class RadixCache: indices = self.req_to_token_pool.req_to_token[req_pool_idx, : len(token_ids)] new_prefix_len = self.insert(token_ids, indices.clone()) + if self.disable: + if del_in_memory_pool: + self.token_to_kv_pool.dec_refs(indices) + else: + return torch.tensor([], dtype=torch.int64), self.root_node + # Radix Cache takes one ref in memory pool self.token_to_kv_pool.dec_refs(indices[last_uncached_pos:new_prefix_len])