[Misc] Upgrade vllm hash to 12_14 (#5000)

### What this PR does / why we need it? ### Does this PR introduce _any_ user-facing change? 1. fix https://github.com/vllm-project/vllm/pull/27938 2. fix https://github.com/vllm-project/vllm/pull/27145 pooling models now supports chunked prefill and prefix caching, 3. fix https://github.com/vllm-project/vllm/pull/30181 define the CPU fields in the field config where they really belong. 4. fix https://github.com/vllm-project/vllm/pull/28168 define the CPU fields in the field config where they really belong. 5. fix https://github.com/vllm-project/vllm/pull/30201 some moudle rename 6. fix https://github.com/vllm-project/vllm/pull/29067 fusedmoe moudle refactor 7. fix https://github.com/vllm-project/vllm/pull/29066 fusedmoe moudle refactor 8. fix https://github.com/vllm-project/vllm/pull/29624 ### How was this patch tested? - vLLM version: v0.12.0 - vLLM main: ad32e3e19c --------- Signed-off-by: wangli <wangli858794774@gmail.com>
2025-12-15 19:54:23 +08:00
parent 3b7eb5179f
commit 8d2998d0e4
17 changed files with 167 additions and 1183 deletions
--- a/vllm_ascend/patch/platform/patch_ec_connector012.py
+++ b/vllm_ascend/patch/platform/patch_ec_connector012.py
@@ -0,0 +1,33 @@
+import vllm.distributed.ec_transfer.ec_connector.shared_storage_connector  # type: ignore[import-not-found]  # noqa
+from safetensors.torch import load_file
+from vllm.distributed.ec_transfer.ec_connector.base import \
+    ECConnectorMetadata  # type: ignore[import-not-found]  # noqa
+from vllm.distributed.ec_transfer.ec_connector.shared_storage_connector import (  # type: ignore[import-not-found]  # noqa
+    ECSharedStorageConnector, ECSharedStorageConnectorMetadata)
+from vllm.logger import logger
+
+
+class AscendECSharedStorageConnector(ECSharedStorageConnector):
+
+    def start_load_caches(self, encoder_cache, **kwargs) -> None:
+        metadata: ECConnectorMetadata = self._get_connector_metadata()
+        assert isinstance(metadata, ECSharedStorageConnectorMetadata)
+        assert encoder_cache is not None
+        if metadata is None:
+            logger.warning((
+                "In connector.start_load_caches, ",
+                "but the connector metadata is None",
+            ))
+            return
+        # Load the EC for each mm data
+        for mm_data in metadata.mm_datas:
+            if mm_data.mm_hash in encoder_cache:
+                continue
+            filename = self._generate_filename_debug(mm_data.mm_hash)
+            ec_cache = load_file(filename)["ec_cache"].npu()
+            encoder_cache[mm_data.mm_hash] = ec_cache
+            logger.debug("Success load encoder cache for hash %s",
+                         mm_data.mm_hash)
+
+
+vllm.distributed.ec_transfer.ec_connector.shared_storage_connector.ECSharedStorageConnector = AscendECSharedStorageConnector