From 0a27705917e64993a8a76198ac6e30980578fe60 Mon Sep 17 00:00:00 2001 From: zxr2333 <64738772+nwpu-zxr@users.noreply.github.com> Date: Sat, 13 Sep 2025 14:38:48 +0800 Subject: [PATCH] fix mooncake connector adxl hostname usage (#2824) ### What this PR does / why we need it? This PR is used to adapt the hostname format for Mooncake when using adxl. When Mooncake uses adxl, it is necessary to set ```USE_ASCEND_DIRECT``` to True in the file ```/Mooncake/mooncake-common/common.cmake``` during compilation. The mooncake_connector obtains this config by calling ```vllm_config.kv_transfer_config.get_from_extra_config```, determines whether Mooncake is using adxl, and selects the corresponding hostname format. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By CI. - vLLM version: main - vLLM main: https://github.com/vllm-project/vllm/commit/d21a36f5f9569949dd6c313deed609d77e393850 --------- Signed-off-by: nwpu-zxr --- .../kv_connector/test_mooncake_connector.py | 40 +++++++++++++++++++ vllm_ascend/distributed/mooncake_connector.py | 10 +++-- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/tests/ut/kv_connector/test_mooncake_connector.py b/tests/ut/kv_connector/test_mooncake_connector.py index 0b2782d..a3a593e 100644 --- a/tests/ut/kv_connector/test_mooncake_connector.py +++ b/tests/ut/kv_connector/test_mooncake_connector.py @@ -961,6 +961,46 @@ class TestMooncakeConnectorWorker(unittest.TestCase): for p in self.patches: p.stop() # type: ignore + def test_worker_use_ascend_direct(self): + test_case = [True, False] + + for use_ascend_direct in test_case: + with self.subTest(use_ascend_direct=use_ascend_direct): + config = MagicMock() + config.kv_transfer_config = MagicMock() + config.kv_transfer_config.get_from_extra_config.side_effect = ( + lambda k, d: { + "prefill": { + "tp_size": 2, + "dp_size": 1 + }, + "decode": { + "tp_size": 2, + "dp_size": 1 + }, + "use_ascend_direct": use_ascend_direct, + }.get(k, d)) + + config.parallel_config = MagicMock() + config.parallel_config.tensor_parallel_size = 2 + config.parallel_config.data_parallel_rank_local = 0 + config.parallel_config.data_parallel_size_local = 1 + config.kv_transfer_config.kv_port = 8000 + config.kv_transfer_config.kv_role = 'worker' + + with patch( + "vllm_ascend.distributed.mooncake_connector.get_tensor_model_parallel_rank", + return_value=0): + with patch( + "vllm_ascend.distributed.mooncake_connector.get_tp_group", + return_value=None): + with patch( + "vllm_ascend.distributed.mooncake_connector.get_ip", + return_value="127.0.0.1"): + worker = MooncakeConnectorWorker( + config, self.engine_id) + self.assertIsNotNone(worker) + def test_register_kv_caches_producer(self): worker = MooncakeConnectorWorker(self.vllm_config, self.engine_id) worker.register_kv_caches(self.kv_caches) diff --git a/vllm_ascend/distributed/mooncake_connector.py b/vllm_ascend/distributed/mooncake_connector.py index 4faf37d..75753f0 100644 --- a/vllm_ascend/distributed/mooncake_connector.py +++ b/vllm_ascend/distributed/mooncake_connector.py @@ -782,10 +782,12 @@ class MooncakeConnectorWorker: assert len(device_ids) > self.tp_rank # type: ignore self.device_id = device_ids[self.tp_rank] # type: ignore - self._initialize( - hostname=self.side_channel_host + ':' + '0' + ':' + 'npu_' \ - + str(self.device_id), - device_name=None) + if vllm_config.kv_transfer_config.get_from_extra_config( + 'use_ascend_direct', False): + hostname = self.side_channel_host + else: + hostname = f"{self.side_channel_host}:0:npu_{self.device_id}" + self._initialize(hostname=hostname, device_name=None) self.te_rpc_port = self.engine.get_rpc_port() # Background thread for sending or receiving KV caches.