[0.11.0]Chery pick pta upgrade change (#3940)

This PR cherry-pick two commit from main to upgrade torch-npu to 2.7.1 official release --------- Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-10-31 22:14:26 +08:00
parent 3d81ea03ed
commit 8a7154001e
16 changed files with 93 additions and 171 deletions
--- a/vllm_ascend/utils.py
+++ b/vllm_ascend/utils.py
@@ -510,8 +510,7 @@ def register_ascend_customop(vllm_config: Optional[VllmConfig] = None):
    from vllm_ascend.ops.activation import AscendQuickGELU, AscendSiluAndMul
    from vllm_ascend.ops.common_fused_moe import (AscendFusedMoE,
                                                  AscendSharedFusedMoE)
-    from vllm_ascend.ops.layernorm import (AscendGemmaRMSNorm,
-                                           AscendQuantRMSNorm, AscendRMSNorm)
+    from vllm_ascend.ops.layernorm import AscendGemmaRMSNorm, AscendRMSNorm
    from vllm_ascend.ops.linear import (AscendColumnParallelLinear,
                                        AscendMergedColumnParallelLinear,
                                        AscendQKVParallelLinear,
@@ -547,12 +546,6 @@ def register_ascend_customop(vllm_config: Optional[VllmConfig] = None):
        "MultiHeadLatentAttention": AscendMultiHeadLatentAttention,
    }

-    if vllm_config is not None and \
-        vllm_config.quant_config is not None and \
-        any("norm.bias" in name for name in vllm_config.quant_config.quant_description.keys()) and \
-            not version_check():
-        REGISTERED_ASCEND_OPS["RMSNorm"] = AscendQuantRMSNorm
-
    for name, op_cls in REGISTERED_ASCEND_OPS.items():
        CustomOp.register_oot(_decorated_op_cls=op_cls, name=name)

@@ -743,21 +736,6 @@ def is_hierarchical_communication_enabled():
            and os.getenv("HCCL_INTRA_PCIE_ENABLE", "") == "1")


-@functools.cache
-def version_check():
-    """check if torch_npu version >= dev20250919"""
-    import re
-    torch_npu_version = torch_npu.version.__version__
-    date_pattern = r'dev(\d{8})'
-
-    match = re.search(date_pattern, torch_npu_version)
-    if match:
-        full_date = match.group(1)
-        if full_date >= "20250919":
-            return True
-    return False
-
-
 def has_layer_idx(model_instance: torch.nn.Module) -> bool:
    if model_instance is None:
        return False