From b323be9fe415280c12408202fdd1c6a08adbd20d Mon Sep 17 00:00:00 2001
From: Wang Yixuan <88923622+hust17yixuan@users.noreply.github.com>
Date: Wed, 29 Oct 2025 22:44:44 +0800
Subject: [PATCH] deepseek torchair adapt for torch_npu version (#3876)

### What this PR does / why we need it?
To adapt the torch_npu version to avoid the precision problem of
torchair deepseek. The torch_npu version may result in the different
branches in the ops register, the rms_norm ops has two branches
according to the verson_check, this pr unify the rms_norm in torchair by
patch method. #3862

Signed-off-by: hust17yixuan <303660421@qq.com>
---
 vllm_ascend/torchair/utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/vllm_ascend/torchair/utils.py b/vllm_ascend/torchair/utils.py
index af61c65..97fc3b1 100644
--- a/vllm_ascend/torchair/utils.py
+++ b/vllm_ascend/torchair/utils.py
@@ -210,7 +210,7 @@ def torchair_quant_method_register():
 
 def torchair_ops_patch():
     from vllm_ascend.ops.activation import AscendSiluAndMul
-    from vllm_ascend.ops.layernorm import AscendRMSNorm
+    from vllm_ascend.ops.layernorm import AscendQuantRMSNorm, AscendRMSNorm
     from vllm_ascend.ops.rotary_embedding import (
         AscendDeepseekScalingRotaryEmbedding, AscendRotaryEmbedding)
     from vllm_ascend.ops.vocab_parallel_embedding import \
@@ -232,6 +232,9 @@ def torchair_ops_patch():
     AscendRMSNorm.__init__ = torchair_layernorm.torchair_rmsnorm_init_  # type: ignore[method-assign]
     AscendRMSNorm.forward_oot = torchair_layernorm.torchair_rmsnorm_forward_oot  # type: ignore[method-assign]
 
+    AscendQuantRMSNorm.__init__ = torchair_layernorm.torchair_rmsnorm_init_  # type: ignore[method-assign]
+    AscendQuantRMSNorm.forward_oot = torchair_layernorm.torchair_rmsnorm_forward_oot  # type: ignore[method-assign]
+
     AscendSiluAndMul.forward_oot = torchair_activation.torchair_silu_and_mul_forward_oot  # type: ignore[method-assign]
     AscendVocabParallelEmbedding.forward = vocab_embedding_forward  # type: ignore[method-assign]