[Graph][Fusion] Integrating inductor pass and npugraph ex pass (#6354)

### What this PR does / why we need it? Integrating inductor pass and npugraph ex pass, see RFC: https://github.com/vllm-project/vllm-ascend/issues/6347 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? all tests passed. - vLLM version: v0.14.1 - vLLM main: dc917cceb8 --------- Signed-off-by: wxsIcey <1790571317@qq.com>
2026-02-13 15:34:55 +08:00
parent 87a0b7b7c7
commit 7164990904
16 changed files with 220 additions and 909 deletions
--- a/tests/e2e/singlecard/compile/test_norm_quant_fusion.py
+++ b/tests/e2e/singlecard/compile/test_norm_quant_fusion.py
@@ -40,6 +40,18 @@ else:
    from vllm.compilation.passes.fx_utils import OpOverload


+# Cache backend to avoid duplicate pattern registration
+_backend_cache = None
+
+
+def get_or_create_backend(vllm_config):
+    """Get or create backend with fusion passes (cached to avoid duplicate pattern registration)."""
+    global _backend_cache
+    if _backend_cache is None:
+        _backend_cache = TestBackend(custom_passes=[
+            AddRMSNormQuantFusionPass(vllm_config=vllm_config)
+        ])
+    return _backend_cache

 class TestModelWithoutBias(nn.Module):
    """
@@ -317,9 +329,7 @@ def test_rmsnorm_quant_fusion(

    with vllm.config.set_current_vllm_config(vllm_config):
        with set_ascend_forward_context(None, vllm_config):
-            backend = TestBackend(custom_passes=[
-                AddRMSNormQuantFusionPass(vllm_config=vllm_config)
-            ])
+            backend = get_or_create_backend(vllm_config)
            if use_bias:
                if not enable_custom_op():
                    return