fix deepseek torchair recompile (#3679)
### What this PR does / why we need it? The #3624 PR fix the precision of deepseek torchair, but don't consider the limitation of torch compile which results in the recompile, This PR fixs this problem. PR to main #3678 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? <!-- CI passed with new added/existing test. If it was tested in a way different from regular unit tests, please clarify how you tested step by step, ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future. If tests were not added, please describe why they were not added and/or why it was difficult to add. --> Signed-off-by: hust17yixuan <303660421@qq.com>
This commit is contained in:
@@ -21,8 +21,6 @@ import torch
|
|||||||
from vllm.config import get_current_vllm_config
|
from vllm.config import get_current_vllm_config
|
||||||
from vllm.model_executor.layers.layernorm import RMSNorm
|
from vllm.model_executor.layers.layernorm import RMSNorm
|
||||||
|
|
||||||
from vllm_ascend.utils import version_check
|
|
||||||
|
|
||||||
_original_re_init = RMSNorm.__init__
|
_original_re_init = RMSNorm.__init__
|
||||||
|
|
||||||
|
|
||||||
@@ -38,9 +36,8 @@ def torchair_rmsnorm_init_(
|
|||||||
dtype)
|
dtype)
|
||||||
vllm_config = get_current_vllm_config()
|
vllm_config = get_current_vllm_config()
|
||||||
self.bias = None
|
self.bias = None
|
||||||
self.torch_npu_check = version_check()
|
|
||||||
# quantization with anti_method m4 will generate none-zero norm bias
|
# quantization with anti_method m4 will generate none-zero norm bias
|
||||||
if self.torch_npu_check and vllm_config.quant_config is not None and \
|
if vllm_config.quant_config is not None and \
|
||||||
any("norm.bias" in name for name in vllm_config.quant_config.quant_description.keys()):
|
any("norm.bias" in name for name in vllm_config.quant_config.quant_description.keys()):
|
||||||
self.bias = torch.nn.Parameter(torch.zeros(hidden_size),
|
self.bias = torch.nn.Parameter(torch.zeros(hidden_size),
|
||||||
requires_grad=False)
|
requires_grad=False)
|
||||||
@@ -59,7 +56,6 @@ def torchair_rmsnorm_forward_oot(
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import torch_npu
|
import torch_npu
|
||||||
torch_npu_check = version_check()
|
|
||||||
|
|
||||||
from vllm_ascend.utils import is_310p
|
from vllm_ascend.utils import is_310p
|
||||||
if residual is not None:
|
if residual is not None:
|
||||||
@@ -72,11 +68,11 @@ def torchair_rmsnorm_forward_oot(
|
|||||||
else:
|
else:
|
||||||
x, _, residual = torch_npu.npu_add_rms_norm(
|
x, _, residual = torch_npu.npu_add_rms_norm(
|
||||||
x, residual, self.weight, self.variance_epsilon)
|
x, residual, self.weight, self.variance_epsilon)
|
||||||
if torch_npu_check and self.bias is not None:
|
if self.bias is not None:
|
||||||
x.add_(self.bias)
|
x.add_(self.bias)
|
||||||
return x, residual
|
return x, residual
|
||||||
|
|
||||||
x, residual = torch_npu.npu_rms_norm(x, self.weight, self.variance_epsilon)
|
x, residual = torch_npu.npu_rms_norm(x, self.weight, self.variance_epsilon)
|
||||||
if torch_npu_check and self.bias is not None:
|
if self.bias is not None:
|
||||||
x.add_(self.bias)
|
x.add_(self.bias)
|
||||||
return x
|
return x
|
||||||
|
|||||||
Reference in New Issue
Block a user