fix deepseek torchair recompile (#3679)

### What this PR does / why we need it? The #3624 PR fix the precision of deepseek torchair, but don't consider the limitation of torch compile which results in the recompile, This PR fixs this problem. PR to main #3678 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested?  Signed-off-by: hust17yixuan <303660421@qq.com>
2025-10-23 22:53:13 +08:00
parent d2d19a4c3c
commit d0086d432a
1 changed files with 3 additions and 7 deletions
--- a/vllm_ascend/torchair/ops/torchair_layernorm.py
+++ b/vllm_ascend/torchair/ops/torchair_layernorm.py
@@ -21,8 +21,6 @@ import torch
 from vllm.config import get_current_vllm_config
 from vllm.model_executor.layers.layernorm import RMSNorm

-from vllm_ascend.utils import version_check
-
 _original_re_init = RMSNorm.__init__


@@ -38,9 +36,8 @@ def torchair_rmsnorm_init_(
                      dtype)
    vllm_config = get_current_vllm_config()
    self.bias = None
-    self.torch_npu_check = version_check()
    # quantization with anti_method m4 will generate none-zero norm bias
-    if self.torch_npu_check and vllm_config.quant_config is not None and \
+    if vllm_config.quant_config is not None and \
            any("norm.bias" in name for name in vllm_config.quant_config.quant_description.keys()):
        self.bias = torch.nn.Parameter(torch.zeros(hidden_size),
                                       requires_grad=False)
@@ -59,7 +56,6 @@ def torchair_rmsnorm_forward_oot(
    """

    import torch_npu
-    torch_npu_check = version_check()

    from vllm_ascend.utils import is_310p
    if residual is not None:
@@ -72,11 +68,11 @@ def torchair_rmsnorm_forward_oot(
        else:
            x, _, residual = torch_npu.npu_add_rms_norm(
                x, residual, self.weight, self.variance_epsilon)
-        if torch_npu_check and self.bias is not None:
+        if self.bias is not None:
            x.add_(self.bias)
        return x, residual

    x, residual = torch_npu.npu_rms_norm(x, self.weight, self.variance_epsilon)
-    if torch_npu_check and self.bias is not None:
+    if self.bias is not None:
        x.add_(self.bias)
    return x