Update torch-npu version to 2.7.1 (#3896)
### What this PR does / why we need it?
Upgrade torch-npu to the official release version 2.7.1
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
---------
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -551,8 +551,7 @@ def register_ascend_customop(vllm_config: Optional[VllmConfig] = None):
|
||||
from vllm_ascend.ops.activation import AscendQuickGELU, AscendSiluAndMul
|
||||
from vllm_ascend.ops.fused_moe.fused_moe import (AscendFusedMoE,
|
||||
AscendSharedFusedMoE)
|
||||
from vllm_ascend.ops.layernorm import (AscendGemmaRMSNorm,
|
||||
AscendQuantRMSNorm, AscendRMSNorm)
|
||||
from vllm_ascend.ops.layernorm import AscendGemmaRMSNorm, AscendRMSNorm
|
||||
from vllm_ascend.ops.linear import (AscendColumnParallelLinear,
|
||||
AscendMergedColumnParallelLinear,
|
||||
AscendQKVParallelLinear,
|
||||
@@ -586,12 +585,6 @@ def register_ascend_customop(vllm_config: Optional[VllmConfig] = None):
|
||||
"FusedMoE": AscendFusedMoE,
|
||||
"SharedFusedMoE": AscendSharedFusedMoE,
|
||||
}
|
||||
|
||||
if vllm_config is not None and \
|
||||
vllm_config.quant_config is not None and \
|
||||
any("norm.bias" in name for name in vllm_config.quant_config.quant_description.keys()) and \
|
||||
not version_check():
|
||||
REGISTERED_ASCEND_OPS["RMSNorm"] = AscendQuantRMSNorm
|
||||
mla_to_register = "MultiHeadLatentAttention" if vllm_version_is(
|
||||
"0.11.0") else "MultiHeadLatentAttentionWrapper"
|
||||
if vllm_config and vllm_config.model_config and vllm_config.model_config.use_mla:
|
||||
@@ -791,21 +784,6 @@ def is_hierarchical_communication_enabled():
|
||||
and os.getenv("HCCL_INTRA_PCIE_ENABLE", "") == "1")
|
||||
|
||||
|
||||
@functools.cache
|
||||
def version_check():
|
||||
"""check if torch_npu version >= dev20250919"""
|
||||
import re # noqa
|
||||
torch_npu_version = torch_npu.version.__version__
|
||||
date_pattern = r'dev(\d{8})'
|
||||
|
||||
match = re.search(date_pattern, torch_npu_version)
|
||||
if match:
|
||||
full_date = match.group(1)
|
||||
if full_date >= "20250919":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def has_layer_idx(model_instance: torch.nn.Module) -> bool:
|
||||
if model_instance is None:
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user