Revert "[Bugfix] Fix Qwen2.5-Omni-7B accuarcy test (#4556)" (#4619)

This reverts commit 71e9b379c8. It breaks vllm-ascend/Qwen3-30B-A3B-W8A8 test
This commit is contained in:
wangxiyuan
2025-12-02 13:15:47 +08:00
committed by GitHub
parent e18e3067a7
commit 6360eb1dea
2 changed files with 5 additions and 9 deletions

View File

@@ -108,13 +108,13 @@ class AscendRMSNorm(RMSNorm):
residual: Optional[torch.Tensor] = None,
) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
import torch_npu
if residual is not None:
residual = torch.ops.vllm.maybe_chunk_residual(x, residual)
assert x.size(0) == residual.size(0)
next_need_quant_fusion_linear = getattr(
self, 'next_need_quant_fusion_linear', None)
x, residual = _addrmsnorm_forward_oot(
self, x, residual, next_need_quant_fusion_linear, self.bias)
self, x, residual, self.next_need_quant_fusion_linear,
self.bias)
return x, residual
x, residual = torch_npu.npu_rms_norm(x, self.weight,
self.variance_epsilon)