From 2d3b8a51f98622ed0248150f3e60e40448a72247 Mon Sep 17 00:00:00 2001 From: Canlin Guo Date: Mon, 26 Jan 2026 17:10:03 +0800 Subject: [PATCH] [Patch] Remove the patch of ECExampleConnector (#5976) ### What this PR does / why we need it? Part of #5304. https://github.com/vllm-project/vllm/pull/30225 has been merged now. We don't need this patch anymore. - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2c24bc6996cb165fce92f780b388a5e39b3f4060 Signed-off-by: gcanlin --- vllm_ascend/patch/__init__.py | 12 -------- vllm_ascend/patch/platform/__init__.py | 1 - .../patch/platform/patch_ec_connector.py | 30 ------------------- 3 files changed, 43 deletions(-) delete mode 100644 vllm_ascend/patch/platform/patch_ec_connector.py diff --git a/vllm_ascend/patch/__init__.py b/vllm_ascend/patch/__init__.py index 4a79ce39..94f68006 100644 --- a/vllm_ascend/patch/__init__.py +++ b/vllm_ascend/patch/__init__.py @@ -42,18 +42,6 @@ # Future Plan: # Find a better way to support tensor alignment for 310p without this patch. # -# ** 2. File: platform/patch_ec_connector.py** -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# 1. `vllm.distributed.ec_transfer.ec_connector.shared_storage_connector.ECSharedStorageConnector.start_load_caches` -# Why: -# it's hard code to cuda -# How: -# change the cuda to npu -# Related PR (if no, explain why): -# https://github.com/vllm-project/vllm/pull/30225 -# Future Plan: -# Remove this patch when vllm merges the PR. -# # ** 3. File: platform/patch_mamba_config.py** # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # 1. `vllm.model_executor.models.config.HybridAttentionMambaModelConfig.verify_and_update_config` diff --git a/vllm_ascend/patch/platform/__init__.py b/vllm_ascend/patch/platform/__init__.py index 5fa56ce8..14a06d6b 100644 --- a/vllm_ascend/patch/platform/__init__.py +++ b/vllm_ascend/patch/platform/__init__.py @@ -17,7 +17,6 @@ import os import vllm_ascend.patch.platform.patch_distributed # noqa -import vllm_ascend.patch.platform.patch_ec_connector # noqa import vllm_ascend.patch.platform.patch_mamba_config # noqa import vllm_ascend.patch.platform.patch_sched_yield # noqa from vllm_ascend import envs diff --git a/vllm_ascend/patch/platform/patch_ec_connector.py b/vllm_ascend/patch/platform/patch_ec_connector.py deleted file mode 100644 index f7666b74..00000000 --- a/vllm_ascend/patch/platform/patch_ec_connector.py +++ /dev/null @@ -1,30 +0,0 @@ -import vllm.distributed.ec_transfer.ec_connector.example_connector -from safetensors.torch import load_file -from vllm.distributed.ec_transfer.ec_connector.example_connector import ECConnectorMetadata, ECExampleConnector -from vllm.logger import logger - - -class AscendECExampleConnector(ECExampleConnector): - def start_load_caches(self, encoder_cache, **kwargs) -> None: - metadata: ECConnectorMetadata = self._get_connector_metadata() - assert isinstance(metadata, ECConnectorMetadata) - assert encoder_cache is not None - if metadata is None: - logger.warning( - ( - "In connector.start_load_caches, ", - "but the connector metadata is None", - ) - ) - return - # Load the EC for each mm data - for mm_data in metadata.mm_datas: - if mm_data.mm_hash in encoder_cache: - continue - filename = self._generate_filename_debug(mm_data.mm_hash) - ec_cache = load_file(filename)["ec_cache"].npu() - encoder_cache[mm_data.mm_hash] = ec_cache - logger.debug("Success load encoder cache for hash %s", mm_data.mm_hash) - - -vllm.distributed.ec_transfer.ec_connector.example_connector.ECExampleConnector = AscendECExampleConnector