fix qwen3next full graph break. (#3812)
### What this PR does / why we need it? fix qwen3next full graph break. linearattention doesnot has aclgraph_support attr,so change to cudagraph_support to support vllm. <img width="603" height="120" alt="image" src="https://github.com/user-attachments/assets/d2de53bb-4147-495a-9129-51d9083749be" /> ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/releases/v0.11.1 Signed-off-by: wangxiaoxin-sherie <wangxiaoxin7@huawei.com> Co-authored-by: wangxiaoxin-sherie <wangxiaoxin7@huawei.com>
This commit is contained in:
@@ -3775,7 +3775,12 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
||||
|
||||
for attn_group in self._attn_group_iterator():
|
||||
builder = attn_group.get_metadata_builder()
|
||||
if builder.aclgraph_support.value < min_ag_support.value:
|
||||
graph_support = None
|
||||
if hasattr(builder, 'aclgraph_support'):
|
||||
graph_support = builder.aclgraph_support.value
|
||||
else:
|
||||
graph_support = builder.cudagraph_support.value
|
||||
if graph_support < min_ag_support.value:
|
||||
min_ag_support = builder.aclgraph_support
|
||||
min_ag_builder_name = builder.__class__.__name__
|
||||
|
||||
|
||||
Reference in New Issue
Block a user