From b8a317caac6bec831a8bd2ede482378149787f6a Mon Sep 17 00:00:00 2001 From: Slightwind Date: Fri, 12 Dec 2025 14:37:26 +0800 Subject: [PATCH] [main][Bugfix] Remove the ZMQ communication setup on the D node (#4926) In the PD separation scenario, the D node does not need to perform get operations, and therefore does not need to create ZeroMQ (ZMQ) communication. - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 Signed-off-by: SlightwindSec --- vllm_ascend/distributed/kvpool/ascend_store_connector.py | 2 +- vllm_ascend/distributed/kvpool/pool_scheduler.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/vllm_ascend/distributed/kvpool/ascend_store_connector.py b/vllm_ascend/distributed/kvpool/ascend_store_connector.py index 093f3c07..1f11f841 100644 --- a/vllm_ascend/distributed/kvpool/ascend_store_connector.py +++ b/vllm_ascend/distributed/kvpool/ascend_store_connector.py @@ -55,7 +55,7 @@ class AscendStoreConnector(KVConnectorBase_V1): ) assert self.connector_worker is not None - if vllm_config.parallel_config.rank == 0: + if vllm_config.parallel_config.rank == 0 and self.kv_role != "kv_consumer": self.lookup_server = LookupKeyServer(self.connector_worker, vllm_config, self.use_layerwise) diff --git a/vllm_ascend/distributed/kvpool/pool_scheduler.py b/vllm_ascend/distributed/kvpool/pool_scheduler.py index 4aa1a5d7..8d239a16 100644 --- a/vllm_ascend/distributed/kvpool/pool_scheduler.py +++ b/vllm_ascend/distributed/kvpool/pool_scheduler.py @@ -20,13 +20,14 @@ from vllm_ascend.distributed.kvpool.config_data import ( class KVPoolScheduler: def __init__(self, vllm_config: "VllmConfig", use_layerwise): - self.client = LookupKeyClient(vllm_config) self.use_layerwise = use_layerwise self.kv_role = vllm_config.kv_transfer_config.kv_role self.consumer_is_to_load = vllm_config.kv_transfer_config.kv_connector_extra_config.get( "consumer_is_to_load", False) self.load_async = vllm_config.kv_transfer_config.kv_connector_extra_config.get( "load_async", False) + self.client = LookupKeyClient( + vllm_config) if self.kv_role != "kv_consumer" else None # request_id -> (vllm cached tokes, kvpool cached tokens) self.load_specs: dict[str, LoadSpec] = {} self.pcp_size = getattr(vllm_config.parallel_config, @@ -74,8 +75,8 @@ class KVPoolScheduler: else: token_len = len(request.prompt_token_ids) - num_external_hit_tokens = self.client.lookup(token_len, - request.block_hashes) + num_external_hit_tokens = self.client.lookup( # type: ignore[union-attr] + token_len, request.block_hashes) if num_external_hit_tokens == request.num_tokens: num_external_hit_tokens -= 1