fix deepseek torchair precision (#3635)

### What this PR does / why we need it? The precision of deepseek torchair is broken by #3465 , which due to the origin patch or rmsnorm in torchair. This PR fixes the precision of deepseek torchair. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested?  Signed-off-by: hust17yixuan <303660421@qq.com>
2025-10-22 20:20:32 +08:00
parent 984efdc0d0
commit edccd46d74
2 changed files with 34 additions and 1 deletions
--- a/vllm_ascend/torchair/utils.py
+++ b/vllm_ascend/torchair/utils.py
@@ -229,10 +229,12 @@ def torchair_ops_patch():
    AscendDeepseekScalingRotaryEmbedding.__init__ = deepseek_rope_init_func  # type: ignore[method-assign]
    AscendDeepseekScalingRotaryEmbedding.forward = native_rope_deepseek_forward  # type: ignore[method-assign]

+    AscendRMSNorm.__init__ = torchair_layernorm.torchair_rmsnorm_init_  # type: ignore[method-assign]
    AscendRMSNorm.forward_oot = torchair_layernorm.torchair_rmsnorm_forward_oot  # type: ignore[method-assign]
+
    AscendSiluAndMul.forward_oot = torchair_activation.torchair_silu_and_mul_forward_oot  # type: ignore[method-assign]
    AscendVocabParallelEmbedding.forward = vocab_embedding_forward  # type: ignore[method-assign]


 def super_kernel(prefix: str, option: str, enabled: bool = True):
-    return _super_kernel(prefix, option) if enabled else nullcontext()
+    return _super_kernel(prefix, option) if enabled else nullcontext()