Update torch-npu version to 2.7.1 (#3896)

### What this PR does / why we need it?
Upgrade torch-npu to the official release version 2.7.1


- vLLM version: v0.11.0
- vLLM main:
83f478bb19

---------

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-10-31 17:16:31 +08:00
committed by GitHub
parent 5f6d1b3323
commit fcc9a0eaeb
15 changed files with 83 additions and 168 deletions

View File

@@ -551,8 +551,7 @@ def register_ascend_customop(vllm_config: Optional[VllmConfig] = None):
from vllm_ascend.ops.activation import AscendQuickGELU, AscendSiluAndMul
from vllm_ascend.ops.fused_moe.fused_moe import (AscendFusedMoE,
AscendSharedFusedMoE)
from vllm_ascend.ops.layernorm import (AscendGemmaRMSNorm,
AscendQuantRMSNorm, AscendRMSNorm)
from vllm_ascend.ops.layernorm import AscendGemmaRMSNorm, AscendRMSNorm
from vllm_ascend.ops.linear import (AscendColumnParallelLinear,
AscendMergedColumnParallelLinear,
AscendQKVParallelLinear,
@@ -586,12 +585,6 @@ def register_ascend_customop(vllm_config: Optional[VllmConfig] = None):
"FusedMoE": AscendFusedMoE,
"SharedFusedMoE": AscendSharedFusedMoE,
}
if vllm_config is not None and \
vllm_config.quant_config is not None and \
any("norm.bias" in name for name in vllm_config.quant_config.quant_description.keys()) and \
not version_check():
REGISTERED_ASCEND_OPS["RMSNorm"] = AscendQuantRMSNorm
mla_to_register = "MultiHeadLatentAttention" if vllm_version_is(
"0.11.0") else "MultiHeadLatentAttentionWrapper"
if vllm_config and vllm_config.model_config and vllm_config.model_config.use_mla:
@@ -791,21 +784,6 @@ def is_hierarchical_communication_enabled():
and os.getenv("HCCL_INTRA_PCIE_ENABLE", "") == "1")
@functools.cache
def version_check():
"""check if torch_npu version >= dev20250919"""
import re # noqa
torch_npu_version = torch_npu.version.__version__
date_pattern = r'dev(\d{8})'
match = re.search(date_pattern, torch_npu_version)
if match:
full_date = match.group(1)
if full_date >= "20250919":
return True
return False
def has_layer_idx(model_instance: torch.nn.Module) -> bool:
if model_instance is None:
return False