[feature] add_rms_norm support bias (#5790)

### What this PR does / why we need it? This PR is to replace addRmsNorm and Add With addRmsNormBias. This way can lead to a more effecient result. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Full Test Pass - vLLM version: v0.13.0 - vLLM main: 2f4e6548ef Signed-off-by: Chen_HaoWen <chenhaowen12@huawei.com> Co-authored-by: Chen_HaoWen <chenhaowen12@huawei.com>
2026-01-23 21:09:54 +08:00
parent 6c73b88dd6
commit e90b14140b
24 changed files with 3537 additions and 13 deletions
--- a/vllm_ascend/ops/layernorm.py
+++ b/vllm_ascend/ops/layernorm.py
@@ -23,6 +23,9 @@ from vllm.config import get_current_vllm_config
 from vllm.model_executor.layers.layernorm import GemmaRMSNorm, RMSNorm, RMSNormGated
 from vllm_ascend.ops.triton.layernorm_gated import layer_norm_fwd_npu

+from vllm_ascend.utils import enable_custom_op
+
+
 class AscendRMSNorm(RMSNorm):

    def __init__(
@@ -57,6 +60,9 @@ class AscendRMSNorm(RMSNorm):
                residual = x.to(orig_dtype)
                x, _ = torch_npu.npu_rms_norm(x, self.weight,
                                              self.variance_epsilon)
+            elif enable_custom_op():
+                x, _, residual = torch.ops._C_ascend.npu_add_rms_norm_bias(
+                    x, residual, self.weight, self.bias, self.variance_epsilon)
            else:
                x, _, residual = torch_npu.npu_add_rms_norm(
                    x, residual, self.weight, self.variance_epsilon)
@@ -88,6 +94,10 @@ class AscendGemmaRMSNorm(GemmaRMSNorm):
                residual = x.to(orig_dtype)
                x, _ = torch_npu.npu_rms_norm(x, 1.0 + self.weight,
                                              self.variance_epsilon)
+            elif enable_custom_op():
+                x, _, residual = torch.ops._C_ascend.npu_add_rms_norm_bias(
+                    x, residual, 1.0 + self.weight, None,
+                    self.variance_epsilon)
            else:
                x, _, residual = torch_npu.npu_add_rms_norm(
                    x, residual, 1.0 + self.weight, self.variance_epsilon)