diff --git a/python/sglang/srt/managers/cache_controller.py b/python/sglang/srt/managers/cache_controller.py index d05433339..89fb00da4 100644 --- a/python/sglang/srt/managers/cache_controller.py +++ b/python/sglang/srt/managers/cache_controller.py @@ -636,6 +636,7 @@ class HiCacheController: key_strs, buffer_ptrs, buffer_sizes = self.mem_pool_host.get_buffer_meta( hash_values, host_indices, + self.storage_config.tp_rank, ) get_result = self.storage_backend.batch_get( key_strs, @@ -838,6 +839,7 @@ class HiCacheController: key_strs, buffer_ptrs, buffer_sizes = self.mem_pool_host.get_buffer_meta( hash_values, host_indices, + self.storage_config.tp_rank, ) success = self.storage_backend.batch_set( key_strs, diff --git a/python/sglang/srt/mem_cache/memory_pool_host.py b/python/sglang/srt/mem_cache/memory_pool_host.py index 080ee458d..127c2a072 100644 --- a/python/sglang/srt/mem_cache/memory_pool_host.py +++ b/python/sglang/srt/mem_cache/memory_pool_host.py @@ -7,7 +7,6 @@ from functools import wraps import psutil import torch -from sglang.srt.distributed import get_tensor_model_parallel_rank from sglang.srt.mem_cache.memory_pool import KVCache, MHATokenToKVPool, MLATokenToKVPool from sglang.srt.utils import is_npu @@ -464,8 +463,7 @@ class MHATokenToKVPoolHost(HostKVCache): else: raise ValueError(f"Unsupported layout: {self.layout}") - def get_buffer_meta(self, keys, indices): - local_rank = get_tensor_model_parallel_rank() + def get_buffer_meta(self, keys, indices, local_rank): ptr_list = [] key_list = [] kv_buffer_data_ptr = self.kv_buffer.data_ptr() @@ -704,7 +702,7 @@ class MLATokenToKVPoolHost(HostKVCache): else: raise ValueError(f"Unsupported layout: {self.layout}") - def get_buffer_meta(self, keys, indices): + def get_buffer_meta(self, keys, indices, local_rank): ptr_list = [] key_list = [] kv_buffer_data_ptr = self.kv_buffer.data_ptr()