[BugFix][v0.18.0] require piecewise cudagraph for layerwise AscendSto… (#8282)
<!-- Thanks for sending a pull request! BEFORE SUBMITTING, PLEASE READ https://docs.vllm.ai/en/latest/contributing/overview.html --> ### What this PR does / why we need it? ref:https://github.com/vllm-project/vllm-ascend/issues/8184 following https://github.com/vllm-project/vllm/pull/31057, add `requires_piecewise_for_cudagraph` for `AscendStoreConnector` ### Does this PR introduce _any_ user-facing change? <!-- Note that it means *any* user-facing change including all aspects such as API, interface or other behavior changes. Documentation-only updates are not considered user-facing changes. --> ### How was this patch tested? <!-- CI passed with new added/existing test. If it was tested in a way different from regular unit tests, please clarify how you tested step by step, ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future. If tests were not added, please describe why they were not added and/or why it was difficult to add. --> Signed-off-by: Pz1116 <zpbzpb123123@gmail.com>
This commit is contained in:
@@ -64,6 +64,16 @@ class AscendStoreKVEvents(KVConnectorKVEvents):
|
||||
|
||||
|
||||
class AscendStoreConnector(KVConnectorBase_V1):
|
||||
@classmethod
|
||||
def requires_piecewise_for_cudagraph(cls, extra_config: dict[str, Any]) -> bool:
|
||||
"""
|
||||
AscendStore requires PIECEWISE CUDA graph mode when layerwise
|
||||
operations are enabled. The layerwise load/save hooks perform
|
||||
async synchronization that cannot be safely captured in CUDA
|
||||
graphs.
|
||||
"""
|
||||
return extra_config.get("use_layerwise", False)
|
||||
|
||||
def __init__(self, vllm_config: VllmConfig, role: KVConnectorRole, kv_cache_config: KVCacheConfig | None = None):
|
||||
super().__init__(vllm_config=vllm_config, role=role, kv_cache_config=kv_cache_config)
|
||||
self.kv_role = vllm_config.kv_transfer_config.kv_role
|
||||
|
||||
Reference in New Issue
Block a user