[Recover] [Bugfix] support mtp kv transfer and pp partition by hand in kv transfer (#4892) (revert in #4981) (#5511)

PR #4892 was revert in #4981, we recover it now. For the potential bug
break deepseek3.2 in PD case, we will find it out and fix it.

- vLLM version: v0.13.0
- vLLM main:
45c1ca1ca1

---------

Signed-off-by: lidenghui <lidenghui1110@gmail.com>
This commit is contained in:
lidenghui1110
2026-01-04 16:49:33 +08:00
committed by GitHub
parent 7c210225a2
commit d462577504
2 changed files with 72 additions and 15 deletions

View File

@@ -246,7 +246,8 @@ class TestKVCacheRecvingThreadBasic(unittest.TestCase):
block_len=[1024, 2048],
ready_event=self.ready_event,
vllm_config=self.vllm_config,
kv_caches=self.kv_caches)
kv_caches=self.kv_caches,
prefill_pp_layer_partition=None)
def test_add_request(self):
test_req = {
@@ -300,7 +301,8 @@ class TestSocketManagement(unittest.TestCase):
block_len=[1024, 2048],
ready_event=self.ready_event,
vllm_config=self.vllm_config,
kv_caches=self.kv_caches)
kv_caches=self.kv_caches,
prefill_pp_layer_partition=None)
self.thread.remote_sockets = defaultdict(deque)
self.thread.remote_poller = MagicMock()
@@ -358,7 +360,8 @@ class TestCoreFunctionality(unittest.TestCase):
block_len=[1024, 2048],
ready_event=self.ready_event,
vllm_config=self.vllm_config,
kv_caches=self.kv_caches)
kv_caches=self.kv_caches,
prefill_pp_layer_partition=None)
self.thread.request_queue = self.mock_queue
self.test_req = {
"request_id": "req1",
@@ -444,7 +447,8 @@ class TestMetadataHandling(unittest.TestCase):
block_len=[1024, 2048],
ready_event=self.ready_event,
vllm_config=self.vllm_config,
kv_caches=self.kv_caches)
kv_caches=self.kv_caches,
prefill_pp_layer_partition=None)
self.test_metadata = MooncakeAgentMetadata(
engine_id="remote_engine",
te_rpc_port=9090,
@@ -509,7 +513,8 @@ class TestMainThreadLoop(unittest.TestCase):
block_len=[1024, 2048],
ready_event=self.ready_event,
vllm_config=self.vllm_config,
kv_caches=self.kv_caches)
kv_caches=self.kv_caches,
prefill_pp_layer_partition=None)
self.thread.request_queue = queue.Queue()
@patch.object(KVCacheRecvingThread, '_handle_request')
@@ -546,6 +551,7 @@ class MockVllmConfig:
self.parallel_config = MagicMock()
self.cache_config = MagicMock()
self.kv_transfer_config = MagicMock()
self.speculative_config = MagicMock()
self.model_config.use_mla = True
self.parallel_config.tensor_parallel_size = 2
self.parallel_config.data_parallel_rank = 0