[BugFix]Fix incorrect get_current_vllm_config (#5121)

### What this PR does / why we need it? This PR fixes some incorrect `get_current_vllm_config` calling, which creates empty vllm_config instead. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? - vLLM version: v0.12.0 - vLLM main: ad32e3e19c --------- Signed-off-by: Angazenn <supperccell@163.com>
2025-12-18 22:21:36 +08:00
parent fd9a47c04d
commit 632eab28b7
6 changed files with 12 additions and 15 deletions
--- a/vllm_ascend/attention/attention_v1.py
+++ b/vllm_ascend/attention/attention_v1.py
@@ -367,6 +367,7 @@ class AscendAttentionBackendImpl(AttentionImpl):
        kv_sharing_target_layer_name: Optional[str],
        **kwargs,
    ) -> None:
+        self.vllm_config = get_current_vllm_config()
        self.num_heads = num_heads
        self.head_size = head_size
        self.scale = float(scale)
@@ -723,7 +724,7 @@ class AscendAttentionBackendImpl(AttentionImpl):
    ):
        num_tokens = query.shape[0]
        if (attn_metadata.attn_state == AscendAttentionState.DecodeOnly
-                and using_paged_attention(num_tokens)
+                and using_paged_attention(num_tokens, self.vllm_config)
                and self.sliding_window is None):
            output = self.forward_paged_attention(query, attn_metadata, output)
        else: