[Graph][Fusion] Integrating inductor pass and npugraph ex pass (#6354)

### What this PR does / why we need it?
Integrating inductor pass and npugraph ex pass, see RFC:
https://github.com/vllm-project/vllm-ascend/issues/6347

### Does this PR introduce _any_ user-facing change?
N/A

### How was this patch tested?
all tests passed.

- vLLM version: v0.14.1
- vLLM main:
dc917cceb8

---------

Signed-off-by: wxsIcey <1790571317@qq.com>
This commit is contained in:
Icey
2026-02-13 15:34:55 +08:00
committed by GitHub
parent 87a0b7b7c7
commit 7164990904
16 changed files with 220 additions and 909 deletions

View File

@@ -40,6 +40,18 @@ else:
from vllm.compilation.passes.fx_utils import OpOverload
# Cache backend to avoid duplicate pattern registration
_backend_cache = None
def get_or_create_backend(vllm_config):
"""Get or create backend with fusion passes (cached to avoid duplicate pattern registration)."""
global _backend_cache
if _backend_cache is None:
_backend_cache = TestBackend(custom_passes=[
AddRMSNormQuantFusionPass(vllm_config=vllm_config)
])
return _backend_cache
class TestModelWithoutBias(nn.Module):
"""
@@ -317,9 +329,7 @@ def test_rmsnorm_quant_fusion(
with vllm.config.set_current_vllm_config(vllm_config):
with set_ascend_forward_context(None, vllm_config):
backend = TestBackend(custom_passes=[
AddRMSNormQuantFusionPass(vllm_config=vllm_config)
])
backend = get_or_create_backend(vllm_config)
if use_bias:
if not enable_custom_op():
return