fix qwen3next full graph break. (#3812)
### What this PR does / why we need it? fix qwen3next full graph break. linearattention doesnot has aclgraph_support attr,so change to cudagraph_support to support vllm. <img width="603" height="120" alt="image" src="https://github.com/user-attachments/assets/d2de53bb-4147-495a-9129-51d9083749be" /> ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/releases/v0.11.1 Signed-off-by: wangxiaoxin-sherie <wangxiaoxin7@huawei.com> Co-authored-by: wangxiaoxin-sherie <wangxiaoxin7@huawei.com>
This commit is contained in:
@@ -3775,7 +3775,12 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
|
|
||||||
for attn_group in self._attn_group_iterator():
|
for attn_group in self._attn_group_iterator():
|
||||||
builder = attn_group.get_metadata_builder()
|
builder = attn_group.get_metadata_builder()
|
||||||
if builder.aclgraph_support.value < min_ag_support.value:
|
graph_support = None
|
||||||
|
if hasattr(builder, 'aclgraph_support'):
|
||||||
|
graph_support = builder.aclgraph_support.value
|
||||||
|
else:
|
||||||
|
graph_support = builder.cudagraph_support.value
|
||||||
|
if graph_support < min_ag_support.value:
|
||||||
min_ag_support = builder.aclgraph_support
|
min_ag_support = builder.aclgraph_support
|
||||||
min_ag_builder_name = builder.__class__.__name__
|
min_ag_builder_name = builder.__class__.__name__
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user