[BugFix]fix deepseek torchair recompile (#3678)

### What this PR does / why we need it?
The #3624 PR fix the precision of deepseek torchair, but don't consider
the limitation of torch compile which results in the recompile, This PR
fixs this problem

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- vLLM version: v0.11.0rc3
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0

Signed-off-by: hust17yixuan <303660421@qq.com>
This commit is contained in:
Wang Yixuan
2025-10-23 22:53:01 +08:00
committed by GitHub
parent 3366d47694
commit a7b40b09eb

View File

@@ -21,8 +21,6 @@ import torch
from vllm.config import get_current_vllm_config
from vllm.model_executor.layers.layernorm import RMSNorm
from vllm_ascend.utils import version_check
_original_re_init = RMSNorm.__init__
@@ -38,9 +36,8 @@ def torchair_rmsnorm_init_(
dtype)
vllm_config = get_current_vllm_config()
self.bias = None
self.torch_npu_check = version_check()
# quantization with anti_method m4 will generate none-zero norm bias
if self.torch_npu_check and vllm_config.quant_config is not None and \
if vllm_config.quant_config is not None and \
any("norm.bias" in name for name in vllm_config.quant_config.quant_description.keys()):
self.bias = torch.nn.Parameter(torch.zeros(hidden_size),
requires_grad=False)
@@ -59,7 +56,6 @@ def torchair_rmsnorm_forward_oot(
"""
import torch_npu
torch_npu_check = version_check()
from vllm_ascend.utils import is_310p
if residual is not None:
@@ -72,11 +68,11 @@ def torchair_rmsnorm_forward_oot(
else:
x, _, residual = torch_npu.npu_add_rms_norm(
x, residual, self.weight, self.variance_epsilon)
if torch_npu_check and self.bias is not None:
if self.bias is not None:
x.add_(self.bias)
return x, residual
x, residual = torch_npu.npu_rms_norm(x, self.weight, self.variance_epsilon)
if torch_npu_check and self.bias is not None:
if self.bias is not None:
x.add_(self.bias)
return x