diff --git a/python/sglang/srt/mem_cache/chunk_cache.py b/python/sglang/srt/mem_cache/chunk_cache.py index 54626dffd..34cd2083f 100644 --- a/python/sglang/srt/mem_cache/chunk_cache.py +++ b/python/sglang/srt/mem_cache/chunk_cache.py @@ -27,6 +27,10 @@ class ChunkCache(BasePrefixCache): self.req_to_token_pool = req_to_token_pool self.token_to_kv_pool_allocator = token_to_kv_pool_allocator self.page_size = page_size + if self.token_to_kv_pool_allocator: + self.device = self.token_to_kv_pool_allocator.device + else: + self.device = torch.device("cpu") # NOTE (csy): this is to determine if a cache has prefix matching feature. # Chunk cache always return True to indicate no prefix matching.