[BugFix]Fix incorrect get_current_vllm_config (#5121)
### What this PR does / why we need it?
This PR fixes some incorrect `get_current_vllm_config` calling, which
creates empty vllm_config instead.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: Angazenn <supperccell@163.com>
This commit is contained in:
@@ -367,6 +367,7 @@ class AscendAttentionBackendImpl(AttentionImpl):
|
||||
kv_sharing_target_layer_name: Optional[str],
|
||||
**kwargs,
|
||||
) -> None:
|
||||
self.vllm_config = get_current_vllm_config()
|
||||
self.num_heads = num_heads
|
||||
self.head_size = head_size
|
||||
self.scale = float(scale)
|
||||
@@ -723,7 +724,7 @@ class AscendAttentionBackendImpl(AttentionImpl):
|
||||
):
|
||||
num_tokens = query.shape[0]
|
||||
if (attn_metadata.attn_state == AscendAttentionState.DecodeOnly
|
||||
and using_paged_attention(num_tokens)
|
||||
and using_paged_attention(num_tokens, self.vllm_config)
|
||||
and self.sliding_window is None):
|
||||
output = self.forward_paged_attention(query, attn_metadata, output)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user