[Inductor]change pass to adapt to new addrmsnormBias operator (#6094)

### What this PR does / why we need it? #5790 changes default addrmsnormBias operator if custom ops is enabled. This PR modifies AddRmsNormQuant pass to align with addrmsnormBias. --------- Signed-off-by: Angazenn <supperccell@163.com>
2026-01-24 20:16:44 +08:00
parent 8966a99710
commit 5b746f3e83
2 changed files with 20 additions and 20 deletions
--- a/tests/e2e/singlecard/compile/test_norm_quant_fusion.py
+++ b/tests/e2e/singlecard/compile/test_norm_quant_fusion.py
@@ -32,6 +32,7 @@ from tests.e2e.singlecard.compile.backend import TestBackend
 from vllm_ascend.ascend_forward_context import set_ascend_forward_context
 from vllm_ascend.compilation.passes.norm_quant_fusion_pass import \
    AddRMSNormQuantFusionPass
+from vllm_ascend.utils import enable_custom_op


 class TestModelWithoutBias(nn.Module):
@@ -124,11 +125,8 @@ class TestModelWithBias(nn.Module):
        """
        residual = torch.zeros_like(x)

-        norm_output, _, new_residual = torch_npu.npu_add_rms_norm(
-            x, residual, self.rms_norm_weight, self.eps)
-
-        # Add bias
-        norm_output_with_bias = norm_output + self.bias
+        norm_output_with_bias, _, new_residual = torch.ops._C_ascend.npu_add_rms_norm_bias(
+            x, residual, self.rms_norm_weight, self.bias, self.eps)

        quantized_output = torch.ops.vllm.quantize(norm_output_with_bias,
                                                   self.quant_scale,
@@ -140,8 +138,7 @@ class TestModelWithBias(nn.Module):
    def ops_in_model_before(self) -> List[OpOverload]:
        """Return the list of expected operators BEFORE fusion."""
        return [
-            torch.ops.npu.npu_add_rms_norm.default,
-            torch.ops.aten.add.Tensor,  # Add bias operation
+            torch.ops._C_ascend.npu_add_rms_norm_bias.default,
            torch.ops.vllm.quantize.default
        ]

@@ -249,11 +246,8 @@ class TestModelSPWithBias(nn.Module):
        """
        residual = torch.zeros_like(x)

-        norm_output, _, new_residual = torch_npu.npu_add_rms_norm(
-            x, residual, self.rms_norm_weight, self.eps)
-
-        # Add bias
-        norm_output_with_bias = norm_output + self.bias
+        norm_output_with_bias, _, new_residual = torch.ops._C_ascend.npu_add_rms_norm_bias(
+            x, residual, self.rms_norm_weight, self.bias, self.eps)

        norm_output_with_bias = torch.ops.vllm.maybe_all_gather_and_maybe_unpad(
            norm_output_with_bias, True)
@@ -268,8 +262,7 @@ class TestModelSPWithBias(nn.Module):
    def ops_in_model_before(self) -> List[OpOverload]:
        """Return the list of expected operators BEFORE fusion."""
        return [
-            torch.ops.npu.npu_add_rms_norm.default,
-            torch.ops.aten.add.Tensor,  # Add bias operation
+            torch.ops._C_ascend.npu_add_rms_norm_bias.default,
            torch.ops.vllm.maybe_all_gather_and_maybe_unpad.default,
            torch.ops.vllm.quantize.default
        ]
@@ -322,6 +315,8 @@ def test_rmsnorm_quant_fusion(
                AddRMSNormQuantFusionPass(vllm_config=vllm_config)
            ])
            if use_bias:
+                if not enable_custom_op():
+                    return
                if sp_enable:
                    model = TestModelSPWithBias(hidden_size,
                                                dtype,