[feature] Mooncake_connector support pcp/dcp (#4183)
add feature for Mooncake_connector supporting pcp/dcp
- vLLM version: v0.11.0
- vLLM main:
2918c1b49c
---------
Signed-off-by: wangxiaochao <w00642655@china.huawei.com>
Co-authored-by: wangxiaochao <w00642655@china.huawei.com>
This commit is contained in:
@@ -12,6 +12,7 @@ from unittest.mock import MagicMock, patch
|
||||
|
||||
import msgspec
|
||||
import zmq
|
||||
from vllm.distributed.parallel_state import GroupCoordinator
|
||||
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
@@ -94,7 +95,8 @@ class TestKVCacheSendingThreadInit(unittest.TestCase):
|
||||
'side_channel_port': 5555,
|
||||
'metadata': MagicMock(),
|
||||
'ready_event': threading.Event(),
|
||||
'kv_caches': kv_caches
|
||||
'kv_caches': kv_caches,
|
||||
'pcp_rank': 0
|
||||
}
|
||||
self.threads = []
|
||||
|
||||
@@ -139,7 +141,8 @@ class TestGetAndClearFinishedRequests(unittest.TestCase):
|
||||
"test": "metadata"
|
||||
},
|
||||
'ready_event': threading.Event(),
|
||||
'kv_caches': kv_caches
|
||||
'kv_caches': kv_caches,
|
||||
'pcp_rank': 0
|
||||
}
|
||||
self.thread = KVCacheSendingThread(**self.common_args)
|
||||
|
||||
@@ -174,7 +177,8 @@ class TestKVCacheSendingThread(unittest.TestCase):
|
||||
side_channel_port=free_port,
|
||||
metadata=metadata,
|
||||
ready_event=ready_event,
|
||||
kv_caches={})
|
||||
kv_caches={},
|
||||
pcp_rank=0)
|
||||
thread.start()
|
||||
self.assertTrue(ready_event.wait(timeout=3),
|
||||
"Server thread startup timeout")
|
||||
@@ -617,7 +621,9 @@ class TestMooncakeConnectorMetadata(unittest.TestCase):
|
||||
"remote_block_ids": [4, 5, 6],
|
||||
"remote_engine_id": "remote_engine",
|
||||
"remote_host": "localhost",
|
||||
"remote_port": 5000
|
||||
"remote_port": 5000,
|
||||
"remote_pcp_size": 1,
|
||||
"remote_dcp_size": 1
|
||||
})
|
||||
|
||||
self.assertEqual(len(meta.requests), 1)
|
||||
@@ -663,7 +669,9 @@ class TestMooncakeConnectorSchedulerMatchedTokens(unittest.TestCase):
|
||||
"remote_block_ids": [1, 2, 3],
|
||||
"remote_engine_id": "remote",
|
||||
"remote_host": "localhost",
|
||||
"remote_port": 5000
|
||||
"remote_port": 5000,
|
||||
"remote_pcp_size": 1,
|
||||
"remote_dcp_size": 1
|
||||
}
|
||||
|
||||
meta = self.scheduler.build_connector_meta(MagicMock())
|
||||
@@ -1018,6 +1026,12 @@ class TestMooncakeConnectorWorker(unittest.TestCase):
|
||||
self.mock_transfer_engine.get_rpc_port.return_value = 9090
|
||||
self.mock_transfer_engine.initialize.return_value = 0
|
||||
self.mock_transfer_engine.register_memory.return_value = 0
|
||||
self.mock_dcp_group = MagicMock(spec=GroupCoordinator)
|
||||
self.mock_dcp_group.rank_in_group = 0
|
||||
self.mock_dcp_group.world_size = 1
|
||||
self.mock_dcp_group.device_group = MagicMock()
|
||||
self.mock_dcp = MagicMock()
|
||||
self.mock_dcp.world_size = 1
|
||||
|
||||
self.patches = [
|
||||
patch(
|
||||
@@ -1051,6 +1065,13 @@ class TestMooncakeConnectorWorker(unittest.TestCase):
|
||||
MagicMock()),
|
||||
patch('vllm_ascend.distributed.mooncake_connector.threading.Event',
|
||||
MagicMock()),
|
||||
patch('vllm.distributed.parallel_state.get_dcp_group',
|
||||
return_value=self.mock_dcp_group),
|
||||
patch('vllm.distributed.parallel_state._DCP',
|
||||
return_value=self.mock_dcp),
|
||||
patch(
|
||||
'vllm.distributed.get_decode_context_model_parallel_world_size',
|
||||
return_value=1)
|
||||
]
|
||||
|
||||
for p in self.patches:
|
||||
|
||||
Reference in New Issue
Block a user