Update torch-npu version to 2.7.1 (#3896)

### What this PR does / why we need it? Upgrade torch-npu to the official release version 2.7.1 - vLLM version: v0.11.0 - vLLM main: 83f478bb19 --------- Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-10-31 17:16:31 +08:00
parent 5f6d1b3323
commit fcc9a0eaeb
15 changed files with 83 additions and 168 deletions
--- a/vllm_ascend/utils.py
+++ b/vllm_ascend/utils.py
@@ -551,8 +551,7 @@ def register_ascend_customop(vllm_config: Optional[VllmConfig] = None):
    from vllm_ascend.ops.activation import AscendQuickGELU, AscendSiluAndMul
    from vllm_ascend.ops.fused_moe.fused_moe import (AscendFusedMoE,
                                                     AscendSharedFusedMoE)
-    from vllm_ascend.ops.layernorm import (AscendGemmaRMSNorm,
-                                           AscendQuantRMSNorm, AscendRMSNorm)
+    from vllm_ascend.ops.layernorm import AscendGemmaRMSNorm, AscendRMSNorm
    from vllm_ascend.ops.linear import (AscendColumnParallelLinear,
                                        AscendMergedColumnParallelLinear,
                                        AscendQKVParallelLinear,
@@ -586,12 +585,6 @@ def register_ascend_customop(vllm_config: Optional[VllmConfig] = None):
        "FusedMoE": AscendFusedMoE,
        "SharedFusedMoE": AscendSharedFusedMoE,
    }
-
-    if vllm_config is not None and \
-        vllm_config.quant_config is not None and \
-        any("norm.bias" in name for name in vllm_config.quant_config.quant_description.keys()) and \
-            not version_check():
-        REGISTERED_ASCEND_OPS["RMSNorm"] = AscendQuantRMSNorm
    mla_to_register = "MultiHeadLatentAttention" if vllm_version_is(
        "0.11.0") else "MultiHeadLatentAttentionWrapper"
    if vllm_config and vllm_config.model_config and vllm_config.model_config.use_mla:
@@ -791,21 +784,6 @@ def is_hierarchical_communication_enabled():
            and os.getenv("HCCL_INTRA_PCIE_ENABLE", "") == "1")


-@functools.cache
-def version_check():
-    """check if torch_npu version >= dev20250919"""
-    import re  # noqa
-    torch_npu_version = torch_npu.version.__version__
-    date_pattern = r'dev(\d{8})'
-
-    match = re.search(date_pattern, torch_npu_version)
-    if match:
-        full_date = match.group(1)
-        if full_date >= "20250919":
-            return True
-    return False
-
-
 def has_layer_idx(model_instance: torch.nn.Module) -> bool:
    if model_instance is None:
        return False