[BugFix]Fix incorrect get_current_vllm_config (#5121)

### What this PR does / why we need it?
This PR fixes some incorrect `get_current_vllm_config` calling, which
creates empty vllm_config instead.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?

- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c

---------

Signed-off-by: Angazenn <supperccell@163.com>
This commit is contained in:
Angazenn
2025-12-18 22:21:36 +08:00
committed by GitHub
parent fd9a47c04d
commit 632eab28b7
6 changed files with 12 additions and 15 deletions

View File

@@ -296,8 +296,9 @@ def _update_attn_fia_params(update_stream, forward_context, runtime_shape):
event.record(update_stream)
def update_attn_params(update_stream, forward_context, runtime_shape):
if using_paged_attention(runtime_shape):
def update_attn_params(update_stream, forward_context, runtime_shape,
vllm_config):
if using_paged_attention(runtime_shape, vllm_config):
_update_attn_pa_params(update_stream, forward_context, runtime_shape)
else:
_update_attn_fia_params(update_stream, forward_context, runtime_shape)

View File

@@ -23,8 +23,7 @@ from torch._inductor.pattern_matcher import (PatternMatcherPass,
PatternPrettyPrinter)
from vllm.attention.layer import Attention
from vllm.compilation.vllm_inductor_pass import VllmInductorPass
from vllm.config import (VllmConfig, get_current_vllm_config,
get_layers_from_vllm_config)
from vllm.config import VllmConfig, get_layers_from_vllm_config
class QKNormRopeFusionPattern:
@@ -42,7 +41,6 @@ class QKNormRopeFusionPattern:
self.q_size = self.num_heads * self.head_dim
self.kv_size = self.num_kv_heads * self.head_dim
self.eps = eps
vllm_config = get_current_vllm_config()
self.device = vllm_config.device_config.device if vllm_config.device_config else None
def get_inputs(self):