[feature] add_rms_norm support bias (#5790)

### What this PR does / why we need it? This PR is to replace addRmsNorm and Add With addRmsNormBias. This way can lead to a more effecient result. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Full Test Pass - vLLM version: v0.13.0 - vLLM main: 2f4e6548ef Signed-off-by: Chen_HaoWen <chenhaowen12@huawei.com> Co-authored-by: Chen_HaoWen <chenhaowen12@huawei.com>
2026-01-23 21:09:54 +08:00
parent 6c73b88dd6
commit e90b14140b
24 changed files with 3537 additions and 13 deletions
--- a/tests/e2e/singlecard/test_quantization.py
+++ b/tests/e2e/singlecard/test_quantization.py
@@ -28,8 +28,8 @@ def test_qwen3_w8a8_quant():
    ]
    vllm_target_outputs = [([
        85, 4086, 44, 374, 264, 1550, 42747, 628, 323, 4938, 72816, 44378, 323,
-        13480, 4712, 369, 444, 10994, 82, 13, 1084, 374, 6188, 311, 387
-    ], 'vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs. It is designed to be'
+        13480, 4712, 369, 444, 10994, 82, 13, 1084, 374, 6188, 369, 3460
+    ], 'vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs. It is designed for large'
                            )]

    with VllmRunner(