Reapply "[Refactor] Unify full-graph parameter update logic (#6041)" (#6227) (#6231)

This reverts commit 95649344aa.

The CI failure doesn't related to this change. Let's reapply it.

- vLLM version: v0.14.0
- vLLM main:
d68209402d
This commit is contained in:
wangxiyuan
2026-01-26 09:04:54 +08:00
committed by GitHub
parent c38c838d03
commit 4e3919e965
10 changed files with 420 additions and 415 deletions

View File

@@ -416,7 +416,7 @@ def get_kv_cache_spec(vllm_config: VllmConfig) -> dict[str, KVCacheSpec]:
kv_cache_spec: dict[str, KVCacheSpec] = {}
attn_layers = get_layers_from_vllm_config(vllm_config, AttentionLayerBase)
# NOTE: Must process Attention/MLAAttention before MambaBase to maintain
# ordering expected by acl_graph.py's _update_attn_fia_params.
# ordering expected by graph parameter update logic in attention backends.
mamba_layers: dict[str, MambaBase] = {}
for layer_name, attn_module in attn_layers.items():
if isinstance(attn_module, Attention):