from typing import TYPE_CHECKING from vllm.distributed.kv_transfer.kv_connector.v1.multi_connector import MultiConnector from vllm_ascend.distributed.kv_transfer.kv_p2p.mooncake_layerwise_connector import MooncakeLayerwiseConnector if TYPE_CHECKING: from vllm.v1.core.kv_cache_manager import KVCacheBlocks from vllm.v1.request import Request class AscendMultiConnector(MultiConnector): def update_state_after_alloc(self, request: "Request", blocks: "KVCacheBlocks", num_external_tokens: int): chosen_connector = self._requests_to_connector.get(request.request_id, -1) empty_blocks = blocks.new_empty() for i, c in enumerate(self._connectors): if i == chosen_connector or isinstance(c, MooncakeLayerwiseConnector): # Forward call to the chosen connector (if any). c.update_state_after_alloc(request, blocks, num_external_tokens) else: # Call with empty blocks for other connectors. c.update_state_after_alloc(request, empty_blocks, 0)