[0.11.0][Bugfix] Remove the ZMQ communication setup on the D node (#4916)

In the PD separation scenario, the D node does not need to perform get
operations, and therefore does not need to create ZeroMQ (ZMQ)
communication.
---------

Signed-off-by: SlightwindSec <slightwindsec@gmail.com>
This commit is contained in:
Slightwind
2025-12-12 14:37:49 +08:00
committed by GitHub
parent ceadc2788d
commit 9c0ad46c1a

View File

@@ -45,7 +45,7 @@ class MooncakeConnectorV1(KVConnectorBase_V1):
)
assert self.connector_worker is not None
if vllm_config.parallel_config.rank == 0:
if vllm_config.parallel_config.rank == 0 and self.kv_role != "kv_consumer":
self.lookup_server = MooncakeLookupServer(
self.connector_worker, vllm_config, self.use_layerwise)
@@ -160,9 +160,10 @@ def get_zmq_rpc_path_mooncake(
class MooncakeStoreConnectorV1Scheduler:
def __init__(self, vllm_config: "VllmConfig", use_layerwise):
self.client = MooncakeLookupClient(vllm_config)
self.use_layerwise = use_layerwise
self.kv_role = vllm_config.kv_transfer_config.kv_role
self.client = MooncakeLookupClient(
vllm_config) if self.kv_role != "kv_consumer" else None
self.consumer_is_to_load = vllm_config.kv_transfer_config.kv_connector_extra_config.get(
"consumer_is_to_load", False)
self.load_async = vllm_config.kv_transfer_config.kv_connector_extra_config.get(
@@ -207,7 +208,8 @@ class MooncakeStoreConnectorV1Scheduler:
else:
token_ids = torch.tensor(request.prompt_token_ids)
num_external_hit_tokens = self.client.lookup(token_ids)
num_external_hit_tokens = self.client.lookup( # type: ignore[union-attr]
token_ids)
if num_external_hit_tokens == request.num_tokens:
num_external_hit_tokens -= 1