diff --git a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py index 4c692e33..ed48d0b5 100644 --- a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py +++ b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py @@ -587,6 +587,7 @@ class KVCacheRecvingThread(threading.Thread): block_size = self.vllm_config.cache_config.block_size num_kv_head = max(self.model_config.hf_text_config.num_key_value_heads // self.tp_size, 1) layers = self.model_config.hf_text_config.num_hidden_layers + layers = len(self.kv_caches) flat_block_ids = [item for sublist in block_ids for item in sublist] block_ids_tensor = torch.tensor(flat_block_ids, dtype=torch.int64, device=device)