Reapply "[Refactor] Unify full-graph parameter update logic (#6041)" (#6227) (#6231)

This reverts commit 95649344aa. The CI failure doesn't related to this change. Let's reapply it. - vLLM version: v0.14.0 - vLLM main: d68209402d
2026-01-26 09:04:54 +08:00
parent c38c838d03
commit 4e3919e965
10 changed files with 420 additions and 415 deletions
--- a/vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/cpu_offload_connector.py
+++ b/vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/cpu_offload_connector.py
@@ -416,7 +416,7 @@ def get_kv_cache_spec(vllm_config: VllmConfig) -> dict[str, KVCacheSpec]:
    kv_cache_spec: dict[str, KVCacheSpec] = {}
    attn_layers = get_layers_from_vllm_config(vllm_config, AttentionLayerBase)
    # NOTE: Must process Attention/MLAAttention before MambaBase to maintain
-    # ordering expected by acl_graph.py's _update_attn_fia_params.
+    # ordering expected by graph parameter update logic in attention backends.
    mamba_layers: dict[str, MambaBase] = {}
    for layer_name, attn_module in attn_layers.items():
        if isinstance(attn_module, Attention):