From 870a3f21cb56b344217908fbed764400df8087a8 Mon Sep 17 00:00:00 2001 From: Wang Yixuan <88923622+hust17yixuan@users.noreply.github.com> Date: Wed, 29 Oct 2025 22:39:34 +0800 Subject: [PATCH] [BugFix] deepseek torchair adapt for torch_npu version (#3862) ### What this PR does / why we need it? To adapt the torch_npu version to avoid the precision problem of torchair deepseek. The torch_npu version may result in the different branches in the ops register, the rms_norm ops has two branches according to the verson_check, this pr unify the rms_norm in torchair by patching quant_rms_norm to rms_norm to fix the accuracy issue in torchair scenario - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/83f478bb19489b41e9d208b47b4bb5a95ac171ac Signed-off-by: hust17yixuan <303660421@qq.com> --- vllm_ascend/torchair/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm_ascend/torchair/utils.py b/vllm_ascend/torchair/utils.py index 45b9e220..19367038 100644 --- a/vllm_ascend/torchair/utils.py +++ b/vllm_ascend/torchair/utils.py @@ -212,7 +212,7 @@ def torchair_quant_method_register(): def torchair_ops_patch(): from vllm_ascend.ops.activation import AscendSiluAndMul - from vllm_ascend.ops.layernorm import AscendRMSNorm + from vllm_ascend.ops.layernorm import AscendQuantRMSNorm, AscendRMSNorm from vllm_ascend.ops.rotary_embedding import ( AscendDeepseekScalingRotaryEmbedding, AscendRotaryEmbedding) from vllm_ascend.ops.vocab_parallel_embedding import \ @@ -234,6 +234,9 @@ def torchair_ops_patch(): AscendRMSNorm.__init__ = torchair_layernorm.torchair_rmsnorm_init_ # type: ignore[method-assign] AscendRMSNorm.forward_oot = torchair_layernorm.torchair_rmsnorm_forward_oot # type: ignore[method-assign] + AscendQuantRMSNorm.__init__ = torchair_layernorm.torchair_rmsnorm_init_ # type: ignore[method-assign] + AscendQuantRMSNorm.forward_oot = torchair_layernorm.torchair_rmsnorm_forward_oot # type: ignore[method-assign] + AscendSiluAndMul.forward_oot = torchair_activation.torchair_silu_and_mul_forward_oot # type: ignore[method-assign] AscendVocabParallelEmbedding.forward = vocab_embedding_forward # type: ignore[method-assign]