From 8bc72a807aa1bf833f143169cf2f6be069d2e4ac Mon Sep 17 00:00:00 2001 From: pz1116 <47019764+Pz1116@users.noreply.github.com> Date: Thu, 16 Apr 2026 10:40:14 +0800 Subject: [PATCH] =?UTF-8?q?[BugFix][v0.18.0]=20require=20piecewise=20cudag?= =?UTF-8?q?raph=20for=20layerwise=20AscendSto=E2=80=A6=20(#8282)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What this PR does / why we need it? ref:https://github.com/vllm-project/vllm-ascend/issues/8184 following https://github.com/vllm-project/vllm/pull/31057, add `requires_piecewise_for_cudagraph` for `AscendStoreConnector` ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? Signed-off-by: Pz1116 --- .../kv_pool/ascend_store/ascend_store_connector.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/ascend_store_connector.py b/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/ascend_store_connector.py index 717ff70a..48048805 100644 --- a/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/ascend_store_connector.py +++ b/vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/ascend_store_connector.py @@ -64,6 +64,16 @@ class AscendStoreKVEvents(KVConnectorKVEvents): class AscendStoreConnector(KVConnectorBase_V1): + @classmethod + def requires_piecewise_for_cudagraph(cls, extra_config: dict[str, Any]) -> bool: + """ + AscendStore requires PIECEWISE CUDA graph mode when layerwise + operations are enabled. The layerwise load/save hooks perform + async synchronization that cannot be safely captured in CUDA + graphs. + """ + return extra_config.get("use_layerwise", False) + def __init__(self, vllm_config: VllmConfig, role: KVConnectorRole, kv_cache_config: KVCacheConfig | None = None): super().__init__(vllm_config=vllm_config, role=role, kv_cache_config=kv_cache_config) self.kv_role = vllm_config.kv_transfer_config.kv_role