[feature] Mooncake_connector support pcp/dcp (#4183)

add feature for Mooncake_connector supporting pcp/dcp

- vLLM version: v0.11.0
- vLLM main:
2918c1b49c

---------

Signed-off-by: wangxiaochao <w00642655@china.huawei.com>
Co-authored-by: wangxiaochao <w00642655@china.huawei.com>
This commit is contained in:
wangxiaochao
2025-11-18 10:17:48 +08:00
committed by GitHub
parent 10a046ddce
commit 0d04ad8c8f
2 changed files with 180 additions and 37 deletions

View File

@@ -12,6 +12,7 @@ from unittest.mock import MagicMock, patch
import msgspec
import zmq
from vllm.distributed.parallel_state import GroupCoordinator
from vllm_ascend.utils import vllm_version_is
@@ -94,7 +95,8 @@ class TestKVCacheSendingThreadInit(unittest.TestCase):
'side_channel_port': 5555,
'metadata': MagicMock(),
'ready_event': threading.Event(),
'kv_caches': kv_caches
'kv_caches': kv_caches,
'pcp_rank': 0
}
self.threads = []
@@ -139,7 +141,8 @@ class TestGetAndClearFinishedRequests(unittest.TestCase):
"test": "metadata"
},
'ready_event': threading.Event(),
'kv_caches': kv_caches
'kv_caches': kv_caches,
'pcp_rank': 0
}
self.thread = KVCacheSendingThread(**self.common_args)
@@ -174,7 +177,8 @@ class TestKVCacheSendingThread(unittest.TestCase):
side_channel_port=free_port,
metadata=metadata,
ready_event=ready_event,
kv_caches={})
kv_caches={},
pcp_rank=0)
thread.start()
self.assertTrue(ready_event.wait(timeout=3),
"Server thread startup timeout")
@@ -617,7 +621,9 @@ class TestMooncakeConnectorMetadata(unittest.TestCase):
"remote_block_ids": [4, 5, 6],
"remote_engine_id": "remote_engine",
"remote_host": "localhost",
"remote_port": 5000
"remote_port": 5000,
"remote_pcp_size": 1,
"remote_dcp_size": 1
})
self.assertEqual(len(meta.requests), 1)
@@ -663,7 +669,9 @@ class TestMooncakeConnectorSchedulerMatchedTokens(unittest.TestCase):
"remote_block_ids": [1, 2, 3],
"remote_engine_id": "remote",
"remote_host": "localhost",
"remote_port": 5000
"remote_port": 5000,
"remote_pcp_size": 1,
"remote_dcp_size": 1
}
meta = self.scheduler.build_connector_meta(MagicMock())
@@ -1018,6 +1026,12 @@ class TestMooncakeConnectorWorker(unittest.TestCase):
self.mock_transfer_engine.get_rpc_port.return_value = 9090
self.mock_transfer_engine.initialize.return_value = 0
self.mock_transfer_engine.register_memory.return_value = 0
self.mock_dcp_group = MagicMock(spec=GroupCoordinator)
self.mock_dcp_group.rank_in_group = 0
self.mock_dcp_group.world_size = 1
self.mock_dcp_group.device_group = MagicMock()
self.mock_dcp = MagicMock()
self.mock_dcp.world_size = 1
self.patches = [
patch(
@@ -1051,6 +1065,13 @@ class TestMooncakeConnectorWorker(unittest.TestCase):
MagicMock()),
patch('vllm_ascend.distributed.mooncake_connector.threading.Event',
MagicMock()),
patch('vllm.distributed.parallel_state.get_dcp_group',
return_value=self.mock_dcp_group),
patch('vllm.distributed.parallel_state._DCP',
return_value=self.mock_dcp),
patch(
'vllm.distributed.get_decode_context_model_parallel_world_size',
return_value=1)
]
for p in self.patches: