From c3b1d409a9c589a478b7080a4ae4faf2d1c6c2b3 Mon Sep 17 00:00:00 2001
From: liziyu <56102866+liziyu179@users.noreply.github.com>
Date: Thu, 23 Apr 2026 09:16:37 +0800
Subject: [PATCH] [BugFix] [P/D] [CherryPick] 8540 In scenarios where TP is not
 equal, the KV cache at the MTP layer is not handled. (#8541)

### What this PR does / why we need it?
Fix the issue where the Mooncake connector does not handle the MTP layer
KV cache when TP is unbalanced.
backport: #8540
### Does this PR introduce _any_ user-facing change?


### How was this patch tested?
by nightly

Signed-off-by: liziyu <liziyu16@huawei.com>
---
 vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py
index 4c692e33..ed48d0b5 100644
--- a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py
+++ b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py
@@ -587,6 +587,7 @@ class KVCacheRecvingThread(threading.Thread):
         block_size = self.vllm_config.cache_config.block_size
         num_kv_head = max(self.model_config.hf_text_config.num_key_value_heads // self.tp_size, 1)
         layers = self.model_config.hf_text_config.num_hidden_layers
+        layers = len(self.kv_caches)
         flat_block_ids = [item for sublist in block_ids for item in sublist]
         block_ids_tensor = torch.tensor(flat_block_ids, dtype=torch.int64, device=device)