[Bugfix] Fix Qwen2.5-Omni-7B accuarcy test (#4556)

### What this PR does / why we need it? Fix Qwen2.5-Omni-7B accuarcy test issue：https://github.com/vllm-project/vllm-ascend/issues/4480 Depends on : https://github.com/vllm-project/vllm-ascend/pull/4534 - vLLM version: v0.11.2 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.2 Signed-off-by: hfadzxy <starmoon_zhang@163.com>
2025-12-02 09:20:05 +08:00
parent b4bf01ead1
commit 71e9b379c8
2 changed files with 9 additions and 5 deletions
--- a/vllm_ascend/ops/layernorm.py
+++ b/vllm_ascend/ops/layernorm.py
@@ -108,13 +108,13 @@ class AscendRMSNorm(RMSNorm):
        residual: Optional[torch.Tensor] = None,
    ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
        import torch_npu
-
        if residual is not None:
            residual = torch.ops.vllm.maybe_chunk_residual(x, residual)
            assert x.size(0) == residual.size(0)
+            next_need_quant_fusion_linear = getattr(
+                self, 'next_need_quant_fusion_linear', None)
            x, residual = _addrmsnorm_forward_oot(
-                self, x, residual, self.next_need_quant_fusion_linear,
-                self.bias)
+                self, x, residual, next_need_quant_fusion_linear, self.bias)
            return x, residual
        x, residual = torch_npu.npu_rms_norm(x, self.weight,
                                             self.variance_epsilon)