[BugFix] deepseek torchair adapt for torch_npu version (#3862)

### What this PR does / why we need it?
To adapt the torch_npu version to avoid the precision problem of
torchair deepseek. The torch_npu version may result in the different
branches in the ops register, the rms_norm ops has two branches
according to the verson_check, this pr unify the rms_norm in torchair by
patching quant_rms_norm to rms_norm to fix the accuracy issue in torchair scenario

- vLLM version: v0.11.0rc3
- vLLM main:
83f478bb19

Signed-off-by: hust17yixuan <303660421@qq.com>
This commit is contained in:
Wang Yixuan
2025-10-29 22:39:34 +08:00
committed by GitHub
parent 4a2ab13743
commit 870a3f21cb

View File

@@ -212,7 +212,7 @@ def torchair_quant_method_register():
def torchair_ops_patch():
from vllm_ascend.ops.activation import AscendSiluAndMul
from vllm_ascend.ops.layernorm import AscendRMSNorm
from vllm_ascend.ops.layernorm import AscendQuantRMSNorm, AscendRMSNorm
from vllm_ascend.ops.rotary_embedding import (
AscendDeepseekScalingRotaryEmbedding, AscendRotaryEmbedding)
from vllm_ascend.ops.vocab_parallel_embedding import \
@@ -234,6 +234,9 @@ def torchair_ops_patch():
AscendRMSNorm.__init__ = torchair_layernorm.torchair_rmsnorm_init_ # type: ignore[method-assign]
AscendRMSNorm.forward_oot = torchair_layernorm.torchair_rmsnorm_forward_oot # type: ignore[method-assign]
AscendQuantRMSNorm.__init__ = torchair_layernorm.torchair_rmsnorm_init_ # type: ignore[method-assign]
AscendQuantRMSNorm.forward_oot = torchair_layernorm.torchair_rmsnorm_forward_oot # type: ignore[method-assign]
AscendSiluAndMul.forward_oot = torchair_activation.torchair_silu_and_mul_forward_oot # type: ignore[method-assign]
AscendVocabParallelEmbedding.forward = vocab_embedding_forward # type: ignore[method-assign]